############## Human Action Recogition - Paper, 2017 ##############

import cv2
import time
import theano
from sklearn import svm
import random
import os
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import cPickle
import sys
import itertools
from sklearn.metrics import accuracy_score
from keras.utils import np_utils, generic_utils
from keras import backend as K
from sklearn.cross_validation import train_test_split
from sklearn import cross_validation
from sklearn import preprocessing
from keras.models import Sequential, Model
from keras.layers.convolutional import Convolution3D,MaxPooling3D
from keras.layers.core import Dense, Dropout, Activation, Flatten,TimeDistributedDense
from keras.layers import Input, Merge
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

rs = 2

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, round(cm[i, j], 2),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

def svc(X,y,testdata,testlabel,mylist1,rs):

    y_vals = np_utils.probas_to_classes(y)
    y_vals_test = np_utils.probas_to_classes(testlabel)

    C_range = np.logspace(-2, 10, 13)
    gamma_range = np.logspace(-9, 3, 13)
    param_grid = dict(gamma=gamma_range, C=C_range)
    cv = StratifiedShuffleSplit(n_splits=5, test_size=0.3, random_state=rs)
    grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv)
    grid.fit(X,y_vals)
    best_C = grid.best_params_.get('C')
    best_gamma = grid.best_params_.get('gamma')
    print best_C
    print best_gamma
    clf2 = SVC(kernel='rbf', C= best_C , gamma = best_gamma ,  max_iter= 3000 ,  tol=0.000001 , probability=True)
    clf2.fit(X, y_vals)
    clf2.score(X, y_vals)
    z = clf2.predict(testdata)

    print (classification_report(y_vals_test,z, target_names=mylist1))

    cnf_matrix = confusion_matrix(y_vals_test,z)
    #np.set_printoptions(precision=2)
    class_names = mylist1
    # Plot non-normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix, without normalization')
    # Plot normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, title='Normalized confusion matrix')
    plt.show()
     
sys.setrecursionlimit(10000)

img_rows= 30
img_cols = 50
img_depth = 15
# Training data
X_tr=[]           # variable to store saptial dataset
X_dense = []      # variable to store denseflow dataset

dataset_path = 'kth_dataset'
mylist = os.listdir(dataset_path)

zz = 0

print "Extracting frames from videos and building the input for the Deep neural network ...."

for myaction in mylist:
   actionpath = dataset_path + '/' + myaction
   listing = os.listdir(actionpath)

   for vid in listing:
      vid = dataset_path + '/' + myaction + '/' +  vid
      frames = []
      densef = []
      cap = cv2.VideoCapture(vid)
      ret, frame1 = cap.read()
      fps = cap.get(5)
      frame1 =cv2.resize(frame1,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
      prvs = cv2.cvtColor(frame1,cv2.COLOR_BGR2GRAY)
      zz = zz+1

      for k in xrange(img_depth):
        ret, frame = cap.read()
        frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)   
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        flow = cv2.calcOpticalFlowFarneback(prvs,gray, 0.5, 3, 15, 3, 5, 1.2, 0)

        mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
        flow[...,0] = ang*180/np.pi/2
        fff = flow[...,0]
        frames.append(gray)
        densef.append(fff)
        prvs = gray        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
      cap.release()
      cv2.destroyAllWindows()
      input=np.array(frames)
      inputflow = np.array(densef)
      ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
      iptflow = np.rollaxis(np.rollaxis(inputflow,2,0),2,0)
      X_tr.append(ipt)    
      X_dense.append(iptflow) 
X_tr_array = np.array(X_tr)
X_dense_array = np.array(X_dense)
print X_tr_array.shape
print X_dense_array.shape

num_samples = len(X_dense_array)
label=np.ones((num_samples,),dtype = int)
label[0:100]= 0
label[100:199] = 1
label[199:299] = 2
label[299:399] = 3
label[399:499]= 4
label[499:] = 5

train_data = [X_tr_array,label]
train_data2 = [X_dense_array,label]

(X_train, y_train) = (train_data[0],train_data[1])
print('X_Train shape:', X_train.shape)

(X_train2, y_train2) = (train_data2[0],train_data2[1])
print('X_Dense shape:', X_train2.shape)

train_set = np.zeros((num_samples, 1, img_cols,img_rows,img_depth))
train_set2 = np.zeros((num_samples, 1, img_cols,img_rows,img_depth))
print train_set.shape

for h in xrange(num_samples):
    train_set[h][0][:][:][:]=X_train[h,:,:,:]
    train_set2[h][0][:][:][:]=X_train2[h,:,:,:]

print(train_set.shape, 'Train samples') 

patch_size = 15    # img_depth or number of frames used for each video

print(train_set.shape, 'train samples')

# CNN Training parameters

batchsize0 = 2
nb_classes = 6
nb_epoch = 60

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
print train_set.shape

fullset = np.hstack((train_set,train_set2))

print fullset.shape

# level of pooling to perform at each layer (POOL x POOL)
nb_pool = [2,2,2]

# Pre-processing

fullset = fullset.astype('float32')

fullset -= np.mean(fullset)

fullset /=np.max(fullset)

X_train_new, X_val_new, y_train_new,y_val_new =  train_test_split(fullset, Y_train, test_size=0.3, random_state=rs)

print X_train_new.shape
num_samples2 = len(X_train_new)

X_trainSpatial = np.zeros((num_samples2, 1, img_cols,img_rows,img_depth))
for h in xrange(num_samples2):
    X_trainSpatial[h][0][:][:][:]=X_train_new[h,0,:,:,:]

print 'X_trainSpatial is ',X_trainSpatial.shape

X_trainDense = np.zeros((num_samples2, 1, img_cols,img_rows,img_depth))
for h in xrange(num_samples2):
    X_trainDense[h][0][:][:][:]=X_train_new[h,0,:,:,:]

print 'X_trainDense is ',X_trainDense.shape

num_samples2 = len(X_val_new)

X_valSpatial = np.zeros((num_samples2, 1,  img_cols,img_rows, img_depth))
for h in xrange(num_samples2):
    X_valSpatial[h][0][:][:][:]=X_val_new[h,0,:,:,:]

print 'X_valSpatial is ',X_valSpatial.shape

X_valDense = np.zeros((num_samples2, 1,  img_cols,img_rows, img_depth))
for h in xrange(num_samples2):
    X_valDense[h][0][:][:][:]=X_val_new[h,0,:,:,:]

print 'X_valDense is ',X_valDense.shape

model = cPickle.load(open("model01.pkl","rb"))
weights1 = model.layers[0].layers[0].get_weights()
weights2 = model.layers[0].layers[1].get_weights()

left_branch2 = Sequential()
left_branch2.add(Convolution3D(32, kernel_dim1=3, kernel_dim2=3, kernel_dim3=3, input_shape=(1,50,30,img_depth), border_mode='valid' , activation='relu', 
weights=weights1[0:2]))
left_branch2.add(MaxPooling3D(pool_size=(nb_pool[0], nb_pool[1], nb_pool[2]) , strides=(2, 2, 1)))
left_branch2.add(Convolution3D(64, 3, 3,3, activation='relu', border_mode='same' , weights=weights1[2:4]))
left_branch2.add(MaxPooling3D(pool_size=(nb_pool[0], nb_pool[1], nb_pool[2])))

left_branch2.add(Dropout(0.5))
left_branch2.add(Flatten())
left_branch2.add(Dense(150, init='normal', activation='relu' , weights=weights1[4:6]))
left_branch2.add(Dropout(0.5))


right_branch2 = Sequential()
right_branch2.add(Convolution3D(32, kernel_dim1=3, kernel_dim2=3, kernel_dim3=3, input_shape=(1,50,30,img_depth), border_mode='valid', activation='relu',
weights=weights2[0:2]))
right_branch2.add(MaxPooling3D(pool_size=(nb_pool[0], nb_pool[1], nb_pool[2]) , strides=(2, 2, 1)))
right_branch2.add(Convolution3D(64, 3, 3,3, activation='relu', border_mode='same', weights=weights2[2:4]))
right_branch2.add(MaxPooling3D(pool_size=(nb_pool[0], nb_pool[1], nb_pool[2])))

right_branch2.add(Dropout(0.5))
right_branch2.add(Flatten())
right_branch2.add(Dense(150, init='normal', activation='relu' , weights=weights2[4:6]))
right_branch2.add(Dropout(0.5))

merged2 = Merge([left_branch2, right_branch2], mode='concat')
modelx = Sequential()
modelx.add(merged2)

modelx.compile(loss='categorical_crossentropy', optimizer='RMSprop', metrics=['accuracy'])

layer_output_train = modelx.predict([X_trainSpatial, X_trainDense], batch_size=batchsize0)
layer_output_train = preprocessing.normalize(layer_output_train, norm='l2')
min_max_scaler = preprocessing.MinMaxScaler()

layer_output_test = modelx.predict([X_valSpatial, X_valDense], batch_size=batchsize0)
layer_output_test = preprocessing.normalize(layer_output_test, norm='l2')

svc(layer_output_train,y_train_new,layer_output_test,y_val_new,mylist,rs)