#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Code for running the experiments of the paper presented below.
# If this code is used or adapted, please cite the paper:
'''
Zacarias, A., Alexandre, L.A., 
"SeNA-CNN: Overcoming Catastrophic Forgetting in Convolutional Neural 
Networks by Selective Network Augmentation", 
8th IAPR TC3 Workshop on Artificial Neural Networks in Pattern 
Recognition. Springer LNAI. Siena, Italy, September 19-21, 2018. 
'''
# bibtex:
'''
@InProceedings{zacarias2018a,
 author = {Zacarias, A. and Alexandre, {L.A.}},
 title = {SeNA-CNN: Overcoming Catastrophic Forgetting in Convolutional Neural Networks by Selective Network Augmentation},
 booktitle = {8th IAPR TC3 Workshop on Artificial Neural Networks in Pattern 
Recognition},
 year = {2018},
 publisher = {Springer},
}
'''
# 2018-06-30
# requirements: Tensorflow, keras, nvidia GPU
# License: 
'''
MIT License

Copyright (c) 2018 Abel Zacarias and Luís A. Alexandre

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''

from __future__ import absolute_import
from __future__ import print_function
from fuel import config
# Use this line only if you are using the datasets from kerosene. You can find the dataset at: https://github.com/dribnet/kerosene 
config.data_path = 'path/to/data/set'

import os
import keras
import itertools
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.models import Sequential , Model, load_model, model_from_json
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.utils import np_utils
from matplotlib import pyplot as plt
from keras.layers import Input, Dense, merge
from keras.optimizers import SGD, Adadelta, Adagrad
from keras.utils import plot_model
import keras.backend as K

K.set_image_dim_ordering('th')

batch_size = 32
nb_epoch = 12
# Initialising the CNN for first task (old)
model1 = Sequential()
#Step 1 Conv1. Here we use image 32x32
model1.add(Conv2D(32, (3, 3), padding='same', input_shape=()))
# Step 2- Activation 1
model1.add(Activation('relu'))
# Step 3 - Conv2
model1.add(Conv2D(32, (3, 3)))
# Step 4- Activation 2
model1.add(Activation('relu'))
# Step 5 - Pooling 1
model1.add(MaxPooling2D(pool_size=(2, 2)))
# Step 6 - Dropout 1
model1.add(Dropout(0.25))
# Step 7 - Conv3
model1.add(Conv2D(64, (3, 3), padding='same'))
# Step 8- Activation 3
model1.add(Activation('relu'))
# Step 9 - Conv4
model1.add(Conv2D(64, (3, 3)))
# Step 10- Activation 4
model1.add(Activation('relu'))
# Step 11 - Pooling 2
model1.add(MaxPooling2D(pool_size=(2, 2)))
# Step 12 - Dropout 2
model1.add(Dropout(0.25))
# Step 13- Flattening
model1.add(Flatten())
# Step 14 - Full connection 1
model1.add(Dense(512))
# Step 15 - ctivation 5
model1.add(Activation('relu'))
# Step 16 - Dropout 3
model1.add(Dropout(0.5))
# Step 17 - Full connection 2
model1.add(Dense(nb_classes))
# Step 18 - Softmax
model1.add(Activation('softmax'))

# Compiling the model with sgd.
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model1.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
# Parte 2- Fitting the images
model1.fit(X_train, Y_train, batch_size=batch_size, epochs=12, verbose=1, validation_data=(X_test, Y_test))
# Evaluating the model on isolated learning
score = model1.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
#==============================================================================
# SECOND TRAINING PROCEDURE FOR NEW TASKS

# Initialising the second sequential model as new task 1. The model is then trained with the first two layers of the model1.
model2 = Sequential()
#Step 1 Conv1. Here we use image size 32x32
model2.add(Conv2D(32, (3, 3), padding='same', input_shape=()))
# Freeze the first convolutional layer
model2.layers[0].trainable=False
# Step 2- Activation 1
model2.add(Activation('relu'))
# Step 3 - Conv2
model2.add(Conv2D(32, (3, 3)))
# Freeze the second convolutional layer
model2.layers[0].trainable=False
# Step 4- Activation 2
model2.add(Activation('relu'))
# Step 5 - Pooling 1
model2.add(MaxPooling2D(pool_size=(2, 2)))
# Step 6 - Dropout 1
model2.add(Dropout(0.25))
# Step 7 - Conv3
model2.add(Conv2D(64, (3, 3), padding='same'))
# Step 8- Activation 3
model2.add(Activation('relu'))
# Step 9 - Conv4
model2.add(Conv2D(64, (3, 3)))
# Step 10- Activation 4
model2.add(Activation('relu'))
# Step 11 - Pooling 2
model2.add(MaxPooling2D(pool_size=(2, 2)))
# Step 12 - Dropout 2
model2.add(Dropout(0.25))
# Step 13- Flattening
model2.add(Flatten())
# Step 14 - Full connection 1
model2.add(Dense(512))
# Step 15 - ctivation 5
model2.add(Activation('relu'))
# Step 16 - Dropout 3
model2.add(Dropout(0.5))
# Step 17 - Full connection 2
model2.add(Dense(nb_classes))
# Step 18 - Softmax
model2.add(Activation('softmax'))

## Copying weights from model1 to model2
model2.layers[0].set_weights(model1.layers[0].get_weights())
model2.layers[2].set_weights(model1.layers[2].get_weights())
#==============================================================================
# Compiling the model with sgd.
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model2.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
#==============================================================================
model2.fit(X_train, Y_train, batch_size=batch_size, epochs=12, verbose=1, validation_data=(X_test, Y_test))
score = model2.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
#==============================================================================
# THIRD TRAINING PROCEDURE FOR NEW TASK2

# Initialising the second sequential model as new task 1. The model is then trained with the first two layers of the model1.
model3 = Sequential()
#Step 1 Conv1. Here we use image size 32x32
model3.add(Conv2D(32, (3, 3), padding='same', input_shape=()))
# Freeze the first convolutional layer
model3.layers[0].trainable=False
# Step 2- Activation 1
model3.add(Activation('relu'))
# Step 3 - Conv2
model3.add(Conv2D(32, (3, 3)))
# Freeze the second convolutional layer
model3.layers[0].trainable=False
# Step 4- Activation 2
model3.add(Activation('relu'))
# Step 5 - Pooling 1
model3.add(MaxPooling2D(pool_size=(2, 2)))
# Step 6 - Dropout 1
model3.add(Dropout(0.25))
# Step 7 - Conv3
model3.add(Conv2D(64, (3, 3), padding='same'))
# Step 8- Activation 3
model3.add(Activation('relu'))
# Step 9 - Conv4
model3.add(Conv2D(64, (3, 3)))
# Step 10- Activation 4
model3.add(Activation('relu'))
# Step 11 - Pooling 2
model3.add(MaxPooling2D(pool_size=(2, 2)))
# Step 12 - Dropout 2
model3.add(Dropout(0.25))
# Step 13- Flattening
model3.add(Flatten())
# Step 14 - Full connection 1
model3.add(Dense(512))
# Step 15 - ctivation 5
model3.add(Activation('relu'))
# Step 16 - Dropout 3
model3.add(Dropout(0.5))
# Step 17 - Full connection 2
model3.add(Dense(nb_classes))
# Step 18 - Softmax
model3.add(Activation('softmax'))

## Copying weights from model1 to model2
model3.layers[0].set_weights(model1.layers[0].get_weights())
model3.layers[2].set_weights(model1.layers[2].get_weights())
#==============================================================================
# Compiling the model with sgd.
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model3.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
#==============================================================================
model3.fit(X_train, Y_train, batch_size=batch_size, epochs=12, verbose=1, validation_data=(X_test, Y_test))
score = model3.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
#==============================================================================
# FOURTH PROCEDURE, CREATING THE FUNCTIONAL MODEL FOR THE THREE TASKS
# Branch 1
inputs=Input(shape=())
x1=Conv2D(32, (3, 3), padding='same', input_shape=(3, 32, 32))(inputs_)
x2=Activation('relu')(x1)
x3=Conv2D(32, (3, 3))(x2)
x4=Activation('relu')(x3)
x5=MaxPooling2D(pool_size=(2, 2))(x4)
x6=Dropout(0.25)(x5)

x7=Conv2D(64, (3, 3), padding='same')(x6)
x8=Activation('relu')(x7)
x9=Conv2D(64, (3, 3))(x8)
x10=Activation('relu')(x9)
x11=MaxPooling2D(pool_size=(2, 2))(x10)
x12=Dropout(0.25)(x11)

x13=Flatten()(x12)
x14=Dense(512)(x13)
x15=Activation('relu')(x14)
x16=Dropout(0.5)(x15)
x17=Dense(nb_classes)(x16)
x18=Activation('softmax')(x17)
##=============================================================================
# Connecting the first branch for the first new task to a previous trained task
# Branch 2
x19=Conv2D(64, (3, 3), padding='same')(x6)
x20=Activation('relu')(x19)
x21=Conv2D(64, (3, 3))(x20)
x22=Activation('relu')(x21)
x23=MaxPooling2D(pool_size=(2, 2))(x22)
x24=Dropout(0.25)(x23)

x25=Flatten()(x24)
x26=Dense(512)(x25)
x27=Activation('relu')(x26)
x28=Dropout(0.5)(x27)
x29=Dense(nb_classes)(x28)
x30=Activation('softmax')(x29)
##==============================================================================
# Connecting the second branch for the second new task  to a previous trained task
# Branch 3
x31=Conv2D(64, (3, 3), padding='same')(x6)
x32=Activation('relu')(x31)
x33=Conv2D(64, (3, 3))(x32)
x34=Activation('relu')(x33)
x35=MaxPooling2D(pool_size=(2, 2))(x34)
x36=Dropout(0.25)(x35)

x37=Flatten()(x36)
x38=Dense(512)(x37)
x39=Activation('relu')(x38)
x40=Dropout(0.5)(x39)
x41=Dense(nb_classes)(x40)
x42=Activation('softmax')(x41)
##==============================================================================
predictions=[x42, x30, x18]
Final_model=Model(inputs=inputs, outputs=predictions)
plot_model(Final_model, to_file='Final_c.png')
#==============================================================================
# Copying weights from first task to the correspondent branch in functional model. If you dont use the same architeture as our, you have to be carefull 
# when setting the index of each conv and dense layers. Here we fallow our architeture. 
Final_model.layers[1].set_weights(model1.layers[0].get_weights())
Final_model.layers[3].set_weights(model1.layers[2].get_weights())
Final_model.layers[9].set_weights(model1.layers[6].get_weights())
Final_model.layers[15].set_weights(model1.layers[8].get_weights())
Final_model.layers[30].set_weights(model1.layers[13].get_weights())
Final_model.layers[39].set_weights(model1.layers[16].get_weights())
# Copying weights from first new task to the correspondent branch in functional model. This the second branch in functional model and we do 
# not copy the first two layers because they were already added to the first branch and all others branchs share them. 
Final_model.layers[8].set_weights(model2.layers[6].get_weights())
Final_model.layers[14].set_weights(model2.layers[8].get_weights())
Final_model.layers[29].set_weights(model2.layers[13].get_weights())
Final_model.layers[38].set_weights(model2.layers[16].get_weights())
# Copying weights from second new task to the correspondent branch in functional model. This the third branch in functional model.
Final_model.layers[7].set_weights(model3.layers[6].get_weights())
Final_model.layers[13].set_weights(model3.layers[8].get_weights())
Final_model.layers[28].set_weights(model3.layers[13].get_weights())
Final_model.layers[37].set_weights(model3.layers[16].get_weights())
#=============================================================================
# Compile the all functional model after copying the weights
Final_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# Test data for task1 (old). This the union of the three tasks, but the other was set to zero
X_test_task1=np.zeros((len(size_of_all_task_data), 3, 32, 32))
# Next step you must replace all zeros to the correspondent images of the test data fir first task.
Y_test_task1=np.zeros((len(size_of_all_task_data), nb_classes))
# Next you replace the correspondent targest values.
#
# Test data for new task1. This the union of the three tasks, but the other was set to zero. Do the same as task1
X_test_task2=np.zeros((len(size_of_all_task_data), 3, 32, 32))
Y_test_task2=np.zeros((len(size_of_all_task_data), nb_classes))
#
# Test data for new task2.
X_test_task3=np.zeros((len(size_of_all_task_data), 3, 32, 32))
Y_test_task3=np.zeros((len(size_of_all_task_data), nb_classes))
#
#Evaluate the Functinal model on first task.
score_task1 = Final_c.evaluate(X_test_task1, [Y_test_task3, Y_test_task2, Y_test_task1], verbose=0)
print('Test score for first task:', score_task1[0])
print('Test accuracy for first task:', score_task1[6])
#=============================================================================
# Evaluate the functional model on second task (new task1)
score_task2 = Final_c.evaluate(X_test_task2, [Y_test_task3, Y_test_task2, Y_test_task1], verbose=0)
print('Test score for new task1:', score_task2[0])
print('Test accuracy for new task1:', score_task2[4])
#=============================================================================
# Evaluate the functional model on third task (new task2)
score_task3 = Final_c.evaluate(X_test_task3, [Y_test_task3, Y_test_task2, Y_test_task1], verbose=0)
print('Test score for new task2:', score_task3[0])
print('Test accuracy for new task2:', score_task3[2])