Image garbage classification¶
This analysis is based upon images (very small dataset) within the "img" folder in the datasets folder. Link below.
https://github.com/MEMAUDATA/memaudata.github.io/blob/main/datasets/img
Task : To classify, from an image, whether a trash garbage is clean or dirty.
- Create a manual neural network for this classification task
- Create a CNN using Pytorch
- Creata a CNN using Tensorflow
First step EDA to analyse the image in the train folder!
Example images :
Clean img : RGB , size : 803, 600, 3 ! Grayscale seems ok ! Dirty img : RGB , size : 141, 250, 3 ! Grayscale seems ok !
=> Need to control the size of each image ?!
Nb of images in test folder : 38 Nb of images in training folder : 99
=> Training images size are not normally distributed (mean ≠ median). => Bimodel distribution with two peaks : a small around 150 and a big sharp one arounf 255 ! True for both type og images.
Author : NV, Toulouse, 01/2025
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageFilter
import os
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import tensorflow as tf
from tensorflow import keras
from tqdm import tqdm
from tqdm.keras import TqdmCallback
import nbconvert
EDA¶
# Display the two example img for better visualisation
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(8, 8)) 
clean = Image.open('../datasets/img/clean.jpeg')
dirty = Image.open('../datasets/img/dirty.jpeg')
axes[0].imshow(clean)
axes[1].imshow(dirty)
axes[0].set_title("A clean garbage")
axes[1].set_title("A dirty garbage")
axes[0].axis("off")
axes[1].axis("off") 
plt.show()
clean = np.array(clean)
dirty = np.array(dirty)
print(f"Clean shape : {clean.shape}")
print(f"Dirty shape : {dirty.shape}")
Clean shape : (803, 600, 3) Dirty shape : (141, 250, 3)
fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(10, 8)) 
colors = ['red', 'green', 'blue']
for i, color in enumerate(colors):
        axes[0,0].hist(clean[:, :, i].flatten(), bins=256, range=[0, 256], color=color, alpha=0.7, label=color.capitalize())
        axes[0,1].hist(dirty[:, :, i].flatten(), bins=256, range=[0, 256], color=color ,alpha=0.7)
        axes[i+1,0].hist(clean[:, :, i].flatten(), bins=256, range=[0, 256], color=color ,alpha=0.7)
        axes[i+1,1].hist(dirty[:, :, i].flatten(), bins=256, range=[0, 256], color=color ,alpha=0.7)
        axes[i,0].set_ylabel("Nb pixels")
        axes[0,0].set_xlim([-10, 270])
        axes[0,1].set_xlim([-10, 270])
        axes[i+1,0].set_xlim([-10, 270])
        axes[i+1,1].set_xlim([-10, 270])
axes[3,0].set_ylabel("Nb pixels")
axes[0,0].set_title("Clean Garbage RGB Histograms")
axes[0,1].set_title("Dirty Garbage RGB Histograms")
axes[3,0].set_xlabel("Intensity range")
axes[3,1].set_xlabel("Intensity range")
axes[0,0].legend()
plt.tight_layout()
plt.show()
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 8)) 
clean = Image.open('../datasets/img/clean.jpeg').convert('L')
dirty = Image.open('../datasets/img/dirty.jpeg').convert('L')
clean = np.array(clean)
dirty = np.array(dirty)
print(f"Clean shape : {clean.shape}")
print(f"Dirty shape : {dirty.shape}")
axes[0,0].imshow(clean,cmap='gray')
axes[0,1].imshow(dirty,cmap='gray')
axes[0,0].set_title("Clean")
axes[0,1].set_title("Dirty")
axes[0,0].axis("off")
axes[0,1].axis("off") 
axes[1,0].hist(clean.flatten(), bins=256, range=[0, 256], color='gray')
axes[1,1].hist(dirty.flatten(), bins=256, range=[0, 256], color='gray')
axes[1,0].set_ylabel("Nb pixels")
axes[1,0].set_xlabel("Intensity range")
axes[1,1].set_xlabel("Intensity range")
axes[1,0].set_xlim([-10, 270])
axes[1,1].set_xlim([-10, 270])
plt.tight_layout()
plt.show()
Clean shape : (803, 600) Dirty shape : (141, 250)
Based upon the two example images, the size of each and the histograms within each image seem really different!
Count number of images within each folder (test and train) and analyse the train images.
# Training folder
train_clean_dir = os.path.abspath('../datasets/img/train/clean')
train_dirty_dir = os.path.abspath('../datasets/img/train/dirty')
# Test folder
test_clean_dir = os.path.abspath('../datasets/img/test/clean')
test_dirty_dir = os.path.abspath('../datasets/img/test/dirty')
Count the nb of img within test and training folders¶
def count_image_files(directory):
    return sum(1 for f in os.listdir(directory) if f.lower().endswith(('.jpg', '.jpeg')))
cnt_clean = count_image_files(train_clean_dir)
cnt_dirty = count_image_files(train_dirty_dir)
print(f"nb img training/clean folder : {cnt_clean}")
print(f"nb img training/dirty folder : {cnt_dirty}")
print(f"nb img total training folder : {cnt_dirty + cnt_clean}")
nb img training/clean folder : 55 nb img training/dirty folder : 44 nb img total training folder : 99
cnt_clean = count_image_files(test_clean_dir)
cnt_dirty = count_image_files(test_dirty_dir)
print(f"nb img test/clean folder : {cnt_clean}")
print(f"nb img test/dirty folder : {cnt_dirty}")
print(f"nb img total test folder : {cnt_dirty + cnt_clean}")
nb img test/clean folder : 19 nb img test/dirty folder : 19 nb img total test folder : 38
Display all training imgs and compute average size¶
# Get a list of image filenames
clean_img = [f for f in os.listdir(train_clean_dir) if f.lower().endswith(('.jpg', '.jpeg'))]
dirty_img = [f for f in os.listdir(train_dirty_dir) if f.lower().endswith(('.jpg', '.jpeg'))]
# Create 2 subplots : clean and dirty
def display_img(img_list,folder_path,img_title,img_2_display=10):
    fig, axes = plt.subplots(nrows=(img_2_display+4)//5, ncols=5, figsize=(10, 6))
    axes = axes.flatten()
    for i in range(img_2_display):
        filename = img_list[i]
        img_path = os.path.join(folder_path, filename)
        img = Image.open(img_path)
        axes[i].imshow(img)
        axes[i].axis('off')
        axes[i].set_title(f"{img_title} - {i + 1}")
    plt.tight_layout()
    plt.show()
display_img(clean_img,train_clean_dir,"clean")
display_img(dirty_img,train_dirty_dir,"dirty")
def cnt_shape_img(img_list,folder_path):
    img_size = []
    for i in range(len(img_list)):
        filename = img_list[i]
        img_path = os.path.join(folder_path, filename)
        img = Image.open(img_path)
        img = np.array(img)
        img_size.append(img.shape)
    return  pd.DataFrame(img_size, columns=["X", "Y", "Z"])
c = cnt_shape_img(clean_img,train_clean_dir)
d = cnt_shape_img(dirty_img,train_dirty_dir)
df = pd.concat([c, d])
# Dirty data image size
print(f"Clean :\n{c.agg(['median','mean', 'std', 'min', 'max'])}\n" )
print(f"Dirty :\n{d.agg(['median','mean', 'std', 'min','max'])}\n" )
print(f"All :\n{df.agg(['median','mean', 'std', 'min','max'])}\n" )
Clean :
                  X            Y    Z
median   600.000000   600.000000  3.0
mean     886.272727   853.654545  3.0
std      494.317092   503.283707  0.0
min      244.000000   187.000000  3.0
max     2448.000000  3264.000000  3.0
Dirty :
                  X            Y    Z
median   774.000000   600.000000  3.0
mean    1071.681818  1086.136364  3.0
std     1037.061227   988.328201  0.0
min      141.000000   140.000000  3.0
max     4160.000000  4160.000000  3.0
All :
                  X            Y    Z
median   675.000000   600.000000  3.0
mean     968.676768   956.979798  3.0
std      784.294234   762.656166  0.0
min      141.000000   140.000000  3.0
max     4160.000000  4160.000000  3.0
Preprocessing¶
def get_image_paths(directory):
    return [os.path.join(directory, f) for f in os.listdir(directory) if f.lower().endswith(('.jpg', '.jpeg'))]
# Gather image filenames from both directories
train_clean_dir = os.path.abspath('../datasets/img/train/clean')
train_dirty_dir = os.path.abspath('../datasets/img/train/dirty')
train_clean_img = get_image_paths(train_clean_dir)
train_dirty_img = get_image_paths(train_dirty_dir)
# Combine the lists
all_train_images = train_clean_img + train_dirty_img
# Gather image filenames from both directories
test_clean_dir = os.path.abspath('../datasets/img/test/clean')
test_dirty_dir = os.path.abspath('../datasets/img/test/dirty')
test_clean_img = get_image_paths(test_clean_dir)
test_dirty_img = get_image_paths(test_dirty_dir)
# Combine the lists
all_test_images = test_clean_img + test_dirty_img
Resize / Normalise / Flatten images¶
def preprocessing_image(image_path, target_size=(32,32),grayscale=True, flat = True): # X = 675 and Y = 600
    
    # Convert into grayscale
    if grayscale:
        img = Image.open(image_path).convert('L')
    else :
        img = Image.open(image_path).convert('RGB')
    img = img.resize(target_size, Image.Resampling.LANCZOS)
    img_array = np.array(img).astype(np.float32)
    
    # Normalize the image
    img_array /= 255.0 
    
    if flat:
        img_array = img_array.flatten()
    return img_array
Preprocess images for CNN with neural network¶
train_preprocessed_images   = []
test_preprocessed_images    = []
# Train imgs
for image_path in all_train_images:
    preprocessed_img = preprocessing_image(image_path)
    if preprocessed_img is not None:
        train_preprocessed_images.append(preprocessed_img)
# Test imgs
for image_path in all_test_images:
    
    preprocessed_img = preprocessing_image(image_path)
    if preprocessed_img is not None:
        test_preprocessed_images.append(preprocessed_img)        
X_train = np.array(train_preprocessed_images)
X_test  = np.array(test_preprocessed_images)
y_train = np.array([1]*len(train_clean_img) +[0]*len(train_dirty_img)) 
y_test = np.array([1] * len(test_clean_img) + [0] * len(test_dirty_img))
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)
print(X_train.shape,X_test.shape)
print(y_train.shape,y_test.shape)      
(99, 1024) (38, 1024) (99, 1) (38, 1)
Preprocess images for CNN with Pytorch and Tensorflow¶
train_preprocessed_images_cnn = []
test_preprocessed_images_cnn  = []
# Train img
for image_path in all_train_images:
    preprocessed_img = preprocessing_image(image_path,grayscale=True,flat=False) # Keep 2D images
    if preprocessed_img is not None:
        train_preprocessed_images_cnn.append(preprocessed_img)
# Test img
for image_path in all_test_images:
    preprocessed_img = preprocessing_image(image_path,grayscale=True,flat=False) # Keep 2D images
    if preprocessed_img is not None:
        test_preprocessed_images_cnn.append(preprocessed_img)
X_train_cnn = np.array(train_preprocessed_images_cnn)
X_test_cnn  = np.array(test_preprocessed_images_cnn)
y_train_cnn = np.array([1]*len(train_clean_img) +[0]*len(train_dirty_img)) 
y_test_cnn = np.array([1] * len(test_clean_img) + [0] * len(test_dirty_img))
y_train_cnn = y_train_cnn.reshape(-1, 1)
y_test_cnn = y_test_cnn.reshape(-1, 1)
print(X_train_cnn.shape,X_test_cnn.shape)
print(y_train_cnn.shape,y_test_cnn.shape)   
(99, 32, 32) (38, 32, 32) (99, 1) (38, 1)
Neural network for classification¶
class artificial_neuron:
    def __init__(self,n_iter= 100,learning_rate = 0.1,penalty ='l2',alpha=0.01):
        self.coef_ = None # W
        self.bias_ = None #b
        self.n_iter_ = n_iter
        self.learning_rate_ = learning_rate
        self.loss_ = []
        self.accuracy_ = []
        self.test_accuracy_ = []
        self.penalty= penalty
        self.alpha = alpha
    def predict_proba(self,X):
        Z = X.dot(self.coef_) + self.bias_
        Z = np.clip(Z, -800, 800) # clip Z values.
        return  1 / ( 1 + np.exp(-Z))
    
    def log_loss(self,y,A):
         return 1 / len(y) * np.sum (-y * np.log(A + 10e-15) - (1-y) * np.log(1-A + 10e-15))
         
    def predict(self,X):
        A = self.predict_proba(X)
        return A > 0.5
    
    def evaluate(self, X, y):
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred)
    def display_loss_acc(self):
        fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 6))
        axes[0].plot(self.loss_, label='Loss')
        axes[1].plot(self.accuracy_, label='Training Accuracy',color='orange')
        axes[1].plot(self.test_accuracy_, label=(f'Final test Accuracy: {self.test_accuracy_[-1]:.2f}'), color = "green")
        axes[0].set_xlabel('Iterations')
        axes[0].legend()
        axes[1].set_xlabel('Iterations')
        axes[1].legend()
        plt.show()
   
    def fit(self,X,y,X_test,y_test):
        # Initialisation
        self.coef_ = np.random.rand(X.shape[1],1)
        self.bias_  = np.random.randn(1)
        self.loss_ = []
        # Apprentissage
        for i in tqdm(range(self.n_iter_), total=self.n_iter_):
            # Activations     
            A = self.predict_proba(X)
            # Loss
            self.loss_.append(self.log_loss(y,A))
            # Training Accuracy
            y_pred = self.predict(X)
            acc = accuracy_score(y, y_pred)
            self.accuracy_.append(acc)
            # Test Accuracy
            y_test_pred = self.predict(X_test)
            test_acc = accuracy_score(y_test, y_test_pred)
            self.test_accuracy_.append(test_acc)
            # Gradients    
            dW = 1 /len(y) * np.dot(X.T, A- y)
            db = 1 /len(y) * np.sum (A-y)
            # Add regularization terms to the gradients
            if self.penalty == 'l1':
                dW += self.alpha * np.sign(self.coef_)
            elif self.penalty == 'l2':
                dW += self.alpha * self.coef_
    
            self.coef_ = self.coef_ - self.learning_rate_ * dW
            self.bias_  = self.bias_ - self.learning_rate_ * db
           
model = artificial_neuron(n_iter= 4000,learning_rate = 0.01,penalty ='l2', alpha=0.1)
model.fit(X_train, y_train,X_test,y_test)
100%|██████████| 4000/4000 [00:08<00:00, 446.67it/s]
model.display_loss_acc()
Conclusion:¶
The model has learned but the testing accuracy is low.
Need more data to enhance the model performance
Convolutionnal Neural Network with Pytorch¶
# Warning : (num_samples, channels, height, width)
# Grayscale img here
if X_train_cnn.shape[1] != 1:
    X_train_py = np.expand_dims(X_train_cnn, axis=1)
if X_test_cnn.shape[1] != 1:    
    X_test_py = np.expand_dims(X_test_cnn, axis=1)
X_train_tensor = torch.tensor(X_train_py, dtype=torch.float32)  
y_train_tensor = torch.tensor(y_train_cnn, dtype=torch.float32).view(-1, 1)  
X_test_tensor = torch.tensor(X_test_py, dtype=torch.float32)  
y_test_tensor = torch.tensor(y_test_cnn, dtype=torch.float32).view(-1, 1)  
# Create a TensorDataset and DataLoader
batch_size          = 32
train_dataset       = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset        = TensorDataset(X_test_tensor, y_test_tensor)
train_dataloader    = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader     = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# Define the CNN model
class garbageCNN(nn.Module):
    def __init__(self):
        super(garbageCNN, self).__init__()
        self.conv1      = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.relu       = nn.ReLU()
        self.maxpool    = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2      = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.fc1        = nn.Linear(32 * (X_train_py.shape[2] // 4) * (X_train_py.shape[3] // 4), 1) # Xcnn height and Xcnn width
        self.sigmoid    = nn.Sigmoid()
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc1(x)
        x = self.sigmoid(x)
        return x
Train and evaluate the model¶
model = garbageCNN()
# Define loss function and optimizer
criterion = nn.BCELoss() 
optimizer = optim.Adam(model.parameters(),lr=0.01)#weight_decay =0.01
# Device handling
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Training loop
num_epochs          = 400
losses              = []
all_loss            = []
train_acc           = []
test_acc            = []
for epoch in tqdm(range(num_epochs)):
    epoch_train         = []
    epoch_test          = []
    
    model.train() 
    for images, labels in train_dataloader:
        images, labels = images.to(device), labels.to(device)
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels.view(-1, 1))
        losses.append(loss.item())
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # Training acc
        with torch.no_grad():
            # Pred
            y_pred = (torch.sigmoid(outputs) > 0.5).cpu().numpy()
            y_true = labels.cpu().numpy()
            epoch_train.append(accuracy_score(y_true, y_pred))
    train_acc.append(np.mean(epoch_train))
    # Testing acc
    model.eval()
    with torch.no_grad():
        for images, labels in test_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            # Pred
            y_pred = (torch.sigmoid(outputs) > 0.5).cpu().numpy()
            y_true = labels.cpu().numpy()
            epoch_test.append(accuracy_score(y_true, y_pred))
        test_acc.append(np.mean(epoch_test))
    epoch_loss = sum(losses[-len(train_dataloader):]) / len(train_dataloader)
    all_loss.append(epoch_loss)
100%|██████████| 400/400 [00:46<00:00, 8.58it/s]
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 6))
axes[0].plot(all_loss, label='Loss')
axes[1].plot(train_acc, label='Training Accuracy',color='orange')
axes[1].plot(test_acc, color='green',label=f'Test Accuracy (final: {test_acc[-1]:.2f})')
axes[0].set_xlabel('Epochs')
axes[0].legend()
axes[1].set_xlabel('Epochs')
axes[1].legend(loc='lower left')
plt.show()
The model seems to learn from the training data but the testing 's accuracy is very low !
Convolutionnal Neural Network with TensorFlow¶
# Warning in TensorFlow:(num_samples, height, width, channels)
# Reshape img for TensorFlow
X_train_cnn = X_train_cnn.reshape(X_train_cnn.shape[0], X_train_cnn.shape[1], X_train_cnn.shape[2], 1)
X_test_cnn = X_test_cnn.reshape(X_test_cnn.shape[0], X_test_cnn.shape[1], X_test_cnn.shape[2], 1)
# Define the CNN model
model = tf.keras.models.Sequential([
        tf.keras.layers.Input(shape=(32, 32, 1)),  # Use Input layer to define input shape
        tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='sigmoid' ,kernel_regularizer=tf.keras.regularizers.l2(0.001))])
# Metrics
# Adam adapt Learning rate during training !!!
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
# Train the model
history = model.fit(X_train_cnn, y_train_cnn, 
                    epochs=600, 
                    validation_data=(X_test_cnn, y_test_cnn),
                    verbose=0, 
                    callbacks=[TqdmCallback(verbose=1)]) 
# Evaluate model
test_loss, test_acc = model.evaluate(X_test_cnn,  y_test_cnn)
0epoch [00:00, ?epoch/s]
0batch [00:00, ?batch/s]
2/2 [==============================] - 0s 7ms/step - loss: 0.8967 - accuracy: 0.6579
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 6))
axes[0].plot(history.history['loss'], label='Training Loss')
axes[1].plot(history.history['accuracy'], label='Training Accuracy',color='orange')
axes[1].plot(history.history['val_accuracy'], label=f'Test Accuracy (final :{test_acc:.2f})',color ='green')
axes[0].set_xlabel('Epochs')
axes[0].legend()
axes[1].set_xlabel('Epochs')
axes[1].legend()
plt.show()
Conclusion¶
3 Neural networks were used on this small dataset!
Main conclusion : One simple LinearClassifier might be the best solution ! Do not underestimate it!
3 types of architectures that are totally different and each one need separate fine tuning!
Almost a test accuracy of 0.61 for each technique!!
What to do for better performances :
- Add more datas or do data augmentation
- If no more data try to add either L1 or L2 penalty (already implemented)
- Effect of the sharp peak ? Need to further investigate!