#!/usr/bin/env python2 # -*- coding: utf-8 -*- # Code for running the experiments of the paper presented below. # If this code is used or adapted, please cite the paper: ''' Zacarias, A., Alexandre, L.A., "SeNA-CNN: Overcoming Catastrophic Forgetting in Convolutional Neural Networks by Selective Network Augmentation", 8th IAPR TC3 Workshop on Artificial Neural Networks in Pattern Recognition. Springer LNAI. Siena, Italy, September 19-21, 2018. ''' # bibtex: ''' @InProceedings{zacarias2018a, author = {Zacarias, A. and Alexandre, {L.A.}}, title = {SeNA-CNN: Overcoming Catastrophic Forgetting in Convolutional Neural Networks by Selective Network Augmentation}, booktitle = {8th IAPR TC3 Workshop on Artificial Neural Networks in Pattern Recognition}, year = {2018}, publisher = {Springer}, } ''' # 2018-06-30 # requirements: Tensorflow, keras, nvidia GPU # License: ''' MIT License Copyright (c) 2018 Abel Zacarias and Luís A. Alexandre Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ''' from __future__ import absolute_import from __future__ import print_function from fuel import config # Use this line only if you are using the datasets from kerosene. You can find the dataset at: https://github.com/dribnet/kerosene config.data_path = 'path/to/data/set' import os import keras import itertools import numpy as np np.random.seed(1337) # for reproducibility from keras.models import Sequential , Model, load_model, model_from_json from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.layers.convolutional import Conv2D, MaxPooling2D from keras.utils import np_utils from matplotlib import pyplot as plt from keras.layers import Input, Dense, merge from keras.optimizers import SGD, Adadelta, Adagrad from keras.utils import plot_model import keras.backend as K K.set_image_dim_ordering('th') batch_size = 32 nb_epoch = 12 # Initialising the CNN for first task (old) model1 = Sequential() #Step 1 Conv1. Here we use image 32x32 model1.add(Conv2D(32, (3, 3), padding='same', input_shape=())) # Step 2- Activation 1 model1.add(Activation('relu')) # Step 3 - Conv2 model1.add(Conv2D(32, (3, 3))) # Step 4- Activation 2 model1.add(Activation('relu')) # Step 5 - Pooling 1 model1.add(MaxPooling2D(pool_size=(2, 2))) # Step 6 - Dropout 1 model1.add(Dropout(0.25)) # Step 7 - Conv3 model1.add(Conv2D(64, (3, 3), padding='same')) # Step 8- Activation 3 model1.add(Activation('relu')) # Step 9 - Conv4 model1.add(Conv2D(64, (3, 3))) # Step 10- Activation 4 model1.add(Activation('relu')) # Step 11 - Pooling 2 model1.add(MaxPooling2D(pool_size=(2, 2))) # Step 12 - Dropout 2 model1.add(Dropout(0.25)) # Step 13- Flattening model1.add(Flatten()) # Step 14 - Full connection 1 model1.add(Dense(512)) # Step 15 - ctivation 5 model1.add(Activation('relu')) # Step 16 - Dropout 3 model1.add(Dropout(0.5)) # Step 17 - Full connection 2 model1.add(Dense(nb_classes)) # Step 18 - Softmax model1.add(Activation('softmax')) # Compiling the model with sgd. sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model1.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) # Parte 2- Fitting the images model1.fit(X_train, Y_train, batch_size=batch_size, epochs=12, verbose=1, validation_data=(X_test, Y_test)) # Evaluating the model on isolated learning score = model1.evaluate(X_test, Y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) #============================================================================== # SECOND TRAINING PROCEDURE FOR NEW TASKS # Initialising the second sequential model as new task 1. The model is then trained with the first two layers of the model1. model2 = Sequential() #Step 1 Conv1. Here we use image size 32x32 model2.add(Conv2D(32, (3, 3), padding='same', input_shape=())) # Freeze the first convolutional layer model2.layers[0].trainable=False # Step 2- Activation 1 model2.add(Activation('relu')) # Step 3 - Conv2 model2.add(Conv2D(32, (3, 3))) # Freeze the second convolutional layer model2.layers[0].trainable=False # Step 4- Activation 2 model2.add(Activation('relu')) # Step 5 - Pooling 1 model2.add(MaxPooling2D(pool_size=(2, 2))) # Step 6 - Dropout 1 model2.add(Dropout(0.25)) # Step 7 - Conv3 model2.add(Conv2D(64, (3, 3), padding='same')) # Step 8- Activation 3 model2.add(Activation('relu')) # Step 9 - Conv4 model2.add(Conv2D(64, (3, 3))) # Step 10- Activation 4 model2.add(Activation('relu')) # Step 11 - Pooling 2 model2.add(MaxPooling2D(pool_size=(2, 2))) # Step 12 - Dropout 2 model2.add(Dropout(0.25)) # Step 13- Flattening model2.add(Flatten()) # Step 14 - Full connection 1 model2.add(Dense(512)) # Step 15 - ctivation 5 model2.add(Activation('relu')) # Step 16 - Dropout 3 model2.add(Dropout(0.5)) # Step 17 - Full connection 2 model2.add(Dense(nb_classes)) # Step 18 - Softmax model2.add(Activation('softmax')) ## Copying weights from model1 to model2 model2.layers[0].set_weights(model1.layers[0].get_weights()) model2.layers[2].set_weights(model1.layers[2].get_weights()) #============================================================================== # Compiling the model with sgd. sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model2.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) #============================================================================== model2.fit(X_train, Y_train, batch_size=batch_size, epochs=12, verbose=1, validation_data=(X_test, Y_test)) score = model2.evaluate(X_test, Y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) #============================================================================== # THIRD TRAINING PROCEDURE FOR NEW TASK2 # Initialising the second sequential model as new task 1. The model is then trained with the first two layers of the model1. model3 = Sequential() #Step 1 Conv1. Here we use image size 32x32 model3.add(Conv2D(32, (3, 3), padding='same', input_shape=())) # Freeze the first convolutional layer model3.layers[0].trainable=False # Step 2- Activation 1 model3.add(Activation('relu')) # Step 3 - Conv2 model3.add(Conv2D(32, (3, 3))) # Freeze the second convolutional layer model3.layers[0].trainable=False # Step 4- Activation 2 model3.add(Activation('relu')) # Step 5 - Pooling 1 model3.add(MaxPooling2D(pool_size=(2, 2))) # Step 6 - Dropout 1 model3.add(Dropout(0.25)) # Step 7 - Conv3 model3.add(Conv2D(64, (3, 3), padding='same')) # Step 8- Activation 3 model3.add(Activation('relu')) # Step 9 - Conv4 model3.add(Conv2D(64, (3, 3))) # Step 10- Activation 4 model3.add(Activation('relu')) # Step 11 - Pooling 2 model3.add(MaxPooling2D(pool_size=(2, 2))) # Step 12 - Dropout 2 model3.add(Dropout(0.25)) # Step 13- Flattening model3.add(Flatten()) # Step 14 - Full connection 1 model3.add(Dense(512)) # Step 15 - ctivation 5 model3.add(Activation('relu')) # Step 16 - Dropout 3 model3.add(Dropout(0.5)) # Step 17 - Full connection 2 model3.add(Dense(nb_classes)) # Step 18 - Softmax model3.add(Activation('softmax')) ## Copying weights from model1 to model2 model3.layers[0].set_weights(model1.layers[0].get_weights()) model3.layers[2].set_weights(model1.layers[2].get_weights()) #============================================================================== # Compiling the model with sgd. sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model3.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) #============================================================================== model3.fit(X_train, Y_train, batch_size=batch_size, epochs=12, verbose=1, validation_data=(X_test, Y_test)) score = model3.evaluate(X_test, Y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) #============================================================================== # FOURTH PROCEDURE, CREATING THE FUNCTIONAL MODEL FOR THE THREE TASKS # Branch 1 inputs=Input(shape=()) x1=Conv2D(32, (3, 3), padding='same', input_shape=(3, 32, 32))(inputs_) x2=Activation('relu')(x1) x3=Conv2D(32, (3, 3))(x2) x4=Activation('relu')(x3) x5=MaxPooling2D(pool_size=(2, 2))(x4) x6=Dropout(0.25)(x5) x7=Conv2D(64, (3, 3), padding='same')(x6) x8=Activation('relu')(x7) x9=Conv2D(64, (3, 3))(x8) x10=Activation('relu')(x9) x11=MaxPooling2D(pool_size=(2, 2))(x10) x12=Dropout(0.25)(x11) x13=Flatten()(x12) x14=Dense(512)(x13) x15=Activation('relu')(x14) x16=Dropout(0.5)(x15) x17=Dense(nb_classes)(x16) x18=Activation('softmax')(x17) ##============================================================================= # Connecting the first branch for the first new task to a previous trained task # Branch 2 x19=Conv2D(64, (3, 3), padding='same')(x6) x20=Activation('relu')(x19) x21=Conv2D(64, (3, 3))(x20) x22=Activation('relu')(x21) x23=MaxPooling2D(pool_size=(2, 2))(x22) x24=Dropout(0.25)(x23) x25=Flatten()(x24) x26=Dense(512)(x25) x27=Activation('relu')(x26) x28=Dropout(0.5)(x27) x29=Dense(nb_classes)(x28) x30=Activation('softmax')(x29) ##============================================================================== # Connecting the second branch for the second new task to a previous trained task # Branch 3 x31=Conv2D(64, (3, 3), padding='same')(x6) x32=Activation('relu')(x31) x33=Conv2D(64, (3, 3))(x32) x34=Activation('relu')(x33) x35=MaxPooling2D(pool_size=(2, 2))(x34) x36=Dropout(0.25)(x35) x37=Flatten()(x36) x38=Dense(512)(x37) x39=Activation('relu')(x38) x40=Dropout(0.5)(x39) x41=Dense(nb_classes)(x40) x42=Activation('softmax')(x41) ##============================================================================== predictions=[x42, x30, x18] Final_model=Model(inputs=inputs, outputs=predictions) plot_model(Final_model, to_file='Final_c.png') #============================================================================== # Copying weights from first task to the correspondent branch in functional model. If you dont use the same architeture as our, you have to be carefull # when setting the index of each conv and dense layers. Here we fallow our architeture. Final_model.layers[1].set_weights(model1.layers[0].get_weights()) Final_model.layers[3].set_weights(model1.layers[2].get_weights()) Final_model.layers[9].set_weights(model1.layers[6].get_weights()) Final_model.layers[15].set_weights(model1.layers[8].get_weights()) Final_model.layers[30].set_weights(model1.layers[13].get_weights()) Final_model.layers[39].set_weights(model1.layers[16].get_weights()) # Copying weights from first new task to the correspondent branch in functional model. This the second branch in functional model and we do # not copy the first two layers because they were already added to the first branch and all others branchs share them. Final_model.layers[8].set_weights(model2.layers[6].get_weights()) Final_model.layers[14].set_weights(model2.layers[8].get_weights()) Final_model.layers[29].set_weights(model2.layers[13].get_weights()) Final_model.layers[38].set_weights(model2.layers[16].get_weights()) # Copying weights from second new task to the correspondent branch in functional model. This the third branch in functional model. Final_model.layers[7].set_weights(model3.layers[6].get_weights()) Final_model.layers[13].set_weights(model3.layers[8].get_weights()) Final_model.layers[28].set_weights(model3.layers[13].get_weights()) Final_model.layers[37].set_weights(model3.layers[16].get_weights()) #============================================================================= # Compile the all functional model after copying the weights Final_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) # Test data for task1 (old). This the union of the three tasks, but the other was set to zero X_test_task1=np.zeros((len(size_of_all_task_data), 3, 32, 32)) # Next step you must replace all zeros to the correspondent images of the test data fir first task. Y_test_task1=np.zeros((len(size_of_all_task_data), nb_classes)) # Next you replace the correspondent targest values. # # Test data for new task1. This the union of the three tasks, but the other was set to zero. Do the same as task1 X_test_task2=np.zeros((len(size_of_all_task_data), 3, 32, 32)) Y_test_task2=np.zeros((len(size_of_all_task_data), nb_classes)) # # Test data for new task2. X_test_task3=np.zeros((len(size_of_all_task_data), 3, 32, 32)) Y_test_task3=np.zeros((len(size_of_all_task_data), nb_classes)) # #Evaluate the Functinal model on first task. score_task1 = Final_c.evaluate(X_test_task1, [Y_test_task3, Y_test_task2, Y_test_task1], verbose=0) print('Test score for first task:', score_task1[0]) print('Test accuracy for first task:', score_task1[6]) #============================================================================= # Evaluate the functional model on second task (new task1) score_task2 = Final_c.evaluate(X_test_task2, [Y_test_task3, Y_test_task2, Y_test_task1], verbose=0) print('Test score for new task1:', score_task2[0]) print('Test accuracy for new task1:', score_task2[4]) #============================================================================= # Evaluate the functional model on third task (new task2) score_task3 = Final_c.evaluate(X_test_task3, [Y_test_task3, Y_test_task2, Y_test_task1], verbose=0) print('Test score for new task2:', score_task3[0]) print('Test accuracy for new task2:', score_task3[2])