Problem Statement¶

In [ ]:
from tensorflow import keras
from tensorflow.keras import layers
import pathlib
from tensorflow.keras.utils import image_dataset_from_directory

import pandas as pd
import pathlib
from pathlib import Path

import numpy as np
import pandas as pd

# plotting modules
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import plotly as plotly
plotly.offline.init_notebook_mode()

from tensorflow import keras
from tensorflow.keras import layers

import tensorflow as tf
from keras.utils import to_categorical
from keras.models import load_model

import logging
import warnings

# Suppress TensorFlow warnings
tf.get_logger().setLevel(logging.ERROR)

# Suppress specific warnings
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
warnings.filterwarnings('ignore', category=FutureWarning, module='tensorflow')

import plotly.graph_objects as go
from tensorflow.keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, precision_recall_curve, ConfusionMatrixDisplay
In [ ]:
tf.config.list_physical_devices('GPU')
Out[ ]:
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
In [ ]:
data_folder = pathlib.Path("../../../../../Downloads/archive/plantnet_300K")
In [ ]:
train_path = data_folder / "images_train_over_500"
val_path = data_folder / "images_val_over_500"
test_path = data_folder / "images_test_over_500"
In [ ]:
plant_folders = [x for x in train_path.glob('*') if x.is_dir()]
plant_folders = [x.name for x in plant_folders]
plant_folders[:10]
Out[ ]:
['1355932',
 '1355936',
 '1355937',
 '1355978',
 '1355990',
 '1356022',
 '1356075',
 '1356111',
 '1356126',
 '1356257']
In [ ]:
len(plant_folders)
Out[ ]:
108
In [ ]:
# how many images are in each folder
plant_count = {}
for plant in plant_folders:
    plant_count[plant] = len(list(train_path.glob(f'{plant}/*')))
    
plant_count = {k: v for k, v in sorted(plant_count.items(), key=lambda item: item[1], reverse=True)}
plant_count
Out[ ]:
{'1363227': 7208,
 '1392475': 6337,
 '1356022': 6140,
 '1364099': 5334,
 '1355937': 5178,
 '1359517': 5063,
 '1357330': 4837,
 '1358752': 4502,
 '1359620': 4285,
 '1363128': 4005,
 '1363991': 3862,
 '1355936': 3419,
 '1394460': 3388,
 '1363740': 3353,
 '1394994': 3183,
 '1364173': 3031,
 '1359616': 2811,
 '1364164': 2788,
 '1361824': 2739,
 '1361823': 2738,
 '1397364': 2700,
 '1358095': 2468,
 '1363130': 2448,
 '1389510': 2385,
 '1374048': 2330,
 '1367432': 2245,
 '1409238': 2241,
 '1397268': 2200,
 '1393614': 2101,
 '1356781': 2007,
 '1369887': 1952,
 '1393241': 1941,
 '1394420': 1899,
 '1398178': 1779,
 '1408774': 1776,
 '1435714': 1762,
 '1394591': 1757,
 '1385937': 1730,
 '1355932': 1716,
 '1358094': 1700,
 '1393425': 1685,
 '1393423': 1671,
 '1398592': 1597,
 '1408961': 1578,
 '1358133': 1570,
 '1358766': 1534,
 '1361656': 1503,
 '1384485': 1440,
 '1356257': 1379,
 '1358689': 1359,
 '1394382': 1348,
 '1359498': 1319,
 '1362490': 1303,
 '1357635': 1291,
 '1355990': 1224,
 '1363336': 1181,
 '1396824': 1118,
 '1400100': 1070,
 '1418146': 1056,
 '1356075': 1040,
 '1356382': 1031,
 '1360978': 1030,
 '1363764': 1028,
 '1394454': 1022,
 '1364159': 1007,
 '1393393': 968,
 '1362294': 934,
 '1369960': 923,
 '1409295': 923,
 '1359669': 903,
 '1355978': 891,
 '1391483': 889,
 '1394404': 873,
 '1398515': 835,
 '1356111': 823,
 '1360671': 794,
 '1391192': 784,
 '1390637': 748,
 '1359625': 744,
 '1364172': 742,
 '1360998': 740,
 '1391652': 732,
 '1360588': 730,
 '1358605': 728,
 '1359488': 723,
 '1361759': 710,
 '1356126': 704,
 '1391226': 681,
 '1360153': 677,
 '1398128': 663,
 '1358751': 661,
 '1360590': 661,
 '1359485': 648,
 '1394489': 646,
 '1393792': 630,
 '1363737': 622,
 '1358105': 620,
 '1421021': 608,
 '1357677': 571,
 '1363749': 566,
 '1356421': 550,
 '1363490': 543,
 '1420863': 536,
 '1363699': 528,
 '1358150': 520,
 '1397420': 511,
 '1418547': 504,
 '1392695': 502}
In [ ]:
# how many folders have more than 500 images
len([k for k, v in plant_count.items() if v > 500])
Out[ ]:
108
In [ ]:
fig, axes = plt.subplots(10, 5, figsize=(15, 15))
axes = axes.ravel()

for i in range(50):
    plant = plant_folders[i // 5]
    img_files = list(train_path.glob(f'{plant}/*'))
    
    if len(img_files) > i % 5:
        img_path = img_files[i % 5]
        img = plt.imread(img_path)
        axes[i].imshow(img)
        axes[i].axis('off')
        axes[i].set_title(plant)
    else:
        axes[i].axis('off')
        axes[i].set_title(f'{plant} (No Image)')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
image_size = (180, 180)
batch_size = 32

train_dataset = tf.keras.utils.image_dataset_from_directory(
    train_path,
    # shuffle=False,
    seed=1337,
    image_size=image_size,
    batch_size=batch_size,
)

validation_dataset = tf.keras.utils.image_dataset_from_directory(
    val_path,
    # shuffle=False,
    seed=1337,
    image_size=image_size,
    batch_size=batch_size,
)

test_dataset = tf.keras.utils.image_dataset_from_directory(
    test_path,
    # shuffle=False,
    seed=1337,
    image_size=image_size,
    batch_size=batch_size,
)
Found 188011 files belonging to 108 classes.
Found 23571 files belonging to 108 classes.
Found 23565 files belonging to 108 classes.
In [ ]:
class_names = test_dataset.class_names
num_classes = len(class_names)
In [ ]:
#normalization
normalization_layer = layers.Rescaling(1./255)
In [ ]:
from tensorflow.keras.applications import DenseNet169
from tensorflow.keras import layers, models

base_model = DenseNet169(weights='imagenet', include_top=False, input_shape=(180, 180, 3))
In [ ]:
data_augmentation = tf.keras.Sequential([
    layers.experimental.preprocessing.RandomFlip("horizontal"),
    layers.experimental.preprocessing.RandomRotation(0.1),
    layers.experimental.preprocessing.RandomZoom(0.1),
    layers.experimental.preprocessing.RandomContrast(0.1)
])
In [ ]:
model = tf.keras.Sequential([
    # Data augmentation layer
    layers.Input(shape=(180, 180, 3)),
    normalization_layer,
    data_augmentation,
    
    # Base model (DenseNet169)
    base_model,
    
    # Additional layers
    layers.BatchNormalization(),
    layers.GlobalAveragePooling2D(),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.5),  # Regularization
    layers.Dense(num_classes, activation='softmax')
])
In [ ]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)


model.compile(optimizer=optimizer,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])
optimizer.lr.assign(0.0001)
Out[ ]:
<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=1e-04>
In [ ]:
model.summary()
Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 rescaling_3 (Rescaling)     (None, 180, 180, 3)       0         
                                                                 
 sequential_6 (Sequential)   (None, 180, 180, 3)       0         
                                                                 
 densenet169 (Functional)    (None, 5, 5, 1664)        12642880  
                                                                 
 batch_normalization_3 (Batc  (None, 5, 5, 1664)       6656      
 hNormalization)                                                 
                                                                 
 global_average_pooling2d_3   (None, 1664)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_6 (Dense)             (None, 1024)              1704960   
                                                                 
 dropout_3 (Dropout)         (None, 1024)              0         
                                                                 
 dense_7 (Dense)             (None, 108)               110700    
                                                                 
=================================================================
Total params: 14,465,196
Trainable params: 14,303,468
Non-trainable params: 161,728
_________________________________________________________________
In [ ]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="./models/densenet_with_augmentation_newer.keras",
        save_best_only=True,
        monitor="val_loss")
]
In [ ]:
epochs=10

history = model.fit(
  train_dataset,
  validation_data=validation_dataset,
  epochs=epochs,
  callbacks=callbacks
)
Epoch 1/10
5876/5876 [==============================] - 2673s 453ms/step - loss: 1.1688 - accuracy: 0.6882 - val_loss: 0.8507 - val_accuracy: 0.7524
Epoch 2/10
5876/5876 [==============================] - 2701s 460ms/step - loss: 0.6733 - accuracy: 0.7998 - val_loss: 0.7690 - val_accuracy: 0.7746
Epoch 3/10
5876/5876 [==============================] - 2697s 459ms/step - loss: 0.5577 - accuracy: 0.8295 - val_loss: 0.6638 - val_accuracy: 0.8047
Epoch 4/10
5876/5876 [==============================] - 2694s 458ms/step - loss: 0.4871 - accuracy: 0.8469 - val_loss: 0.7248 - val_accuracy: 0.7976
Epoch 5/10
5876/5876 [==============================] - 2698s 459ms/step - loss: 0.4392 - accuracy: 0.8598 - val_loss: 0.6050 - val_accuracy: 0.8272
Epoch 6/10
5876/5876 [==============================] - 2698s 459ms/step - loss: 0.4024 - accuracy: 0.8705 - val_loss: 0.6410 - val_accuracy: 0.8174
Epoch 7/10
5876/5876 [==============================] - 2695s 459ms/step - loss: 0.3709 - accuracy: 0.8782 - val_loss: 0.7701 - val_accuracy: 0.7919
Epoch 8/10
5876/5876 [==============================] - 2693s 458ms/step - loss: 0.3453 - accuracy: 0.8855 - val_loss: 0.6142 - val_accuracy: 0.8261
Epoch 9/10
5876/5876 [==============================] - 2684s 457ms/step - loss: 0.3263 - accuracy: 0.8910 - val_loss: 0.6070 - val_accuracy: 0.8297
Epoch 10/10
5876/5876 [==============================] - 2673s 455ms/step - loss: 0.3067 - accuracy: 0.8959 - val_loss: 0.6154 - val_accuracy: 0.8283
In [ ]:
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.3f}")
737/737 [==============================] - 50s 68ms/step - loss: 0.5948 - accuracy: 0.8283
Test accuracy: 0.828
In [ ]:
best_model = load_model("./models/densenet_with_augmentation_newer.keras")
best_model.evaluate(test_dataset)
737/737 [==============================] - 50s 65ms/step - loss: 0.5832 - accuracy: 0.8328
Out[ ]:
[0.5832472443580627, 0.8328453302383423]

Test dataset accuracy of 83.28%.