##### demo_muller_brown_GP_sNEB_AIE.py
##### Copyright: Olli-Pekka Koistinen, Aalto University, 12.2.2020
#####
##### This script shows how to use 'GP_sNEB_AIE.py' in a Muller-Brown example.

import numpy as np
import matplotlib.pyplot as plt
import utils
import GP_sNEB_AIE
import muller_brown
import GPy
import paramz
import pdb

pot_general = muller_brown.muller_brown # define the potential energy function

min1 = np.array([[-0.5582,1.4417]]) # define the first minimum point
min2 = np.array([[0.6235,0.0280]]) # define the second minimum point
D = min1.shape[1] # dimensionality of the coordinate space
E_min1, G_min1 = pot_general(min1) # energy and gradient at minimum point 1
E_min2, G_min2 = pot_general(min2) # energy and gradient at minimum point 2
# Elevel = np.min((E_min1,E_min2)) # zero level of energy is set to the lower minimum (scalar)
Elevel = E_min1 # zero level of energy is set to minimum point 1 (scalar)
E_min1 = E_min1 - Elevel
E_min2 = E_min2 - Elevel

N_im = 10 # define the number of images on the path
R_init = utils.initialize_path_linear(min1,min2,N_im) # define the initial path
method_step = utils.step_QMVelocityVerlet # define the step method (e.g., "qmVV" or "simple")
param_step = 0.01 # define parameters for the step method (time step in case of qmVV)
k_par = 1.0 # define the parallel spring constant
k_perp = 1.0 # define the perpendicular spring constant

# 'T_MEP' defines the final convergence threshold for the 'maxnormF', which is
# the maximum of the accurate norms of the NEB forces acting on the 'N_im'-2 intermediate
# images (i.e., the algorithm is stopped when the accurate NEB force is below 'T_MEP' for all images).
T_MEP = 0.01

# 'T_CI' defines an additional final convergence threshold for the
# climbing image, if the climbing image option is used.
# If you don't want to use a tighter convergence threshold for the climbing
# image, set 'T_CI' equal to 'T_MEP' (or larger, because
# the general threshold 'T_MEP' concerns also the climbing image).
T_CI = 0.01

# 'T_CIon_gp' defines a preliminary convergence threshold for each relaxation phase:
# When the approximated 'maxnormF' is below 'T_CIon_gp', the climbing
# image mode is turned on.
# If you don't want to use climbing image at all, set 'T_CIon_gp' to zero.
T_CIon_gp = 0.01

# If 'divisor_T_MEP_gp' is set to zero, the default convergence threshold for each relaxation
# phase for the approximated 'maxnormF' on the approximated energy surface is 1/10 of the lowest final threshold.
# To save inner iterations during the first relaxation phases, one can set
# a positive value for 'divisor_T_MEP_gp', so that the GP convergence threshold will be
# 1/'divisor_T_MEP_gp' of the smallest accurate norm of NEB force obtained so far
# on any of the 'N_im'-2 intermediate images, but not less than 1/10 of the lowest final threshold.
# If the approximation error is assumed to not decrease more than that during one outer iteration,
# there is no need for more accurate relaxation on an approximated surface.
divisor_T_MEP_gp = 10.0

# 'disp_max' defines the maximum displacement of image from the nearest evaluated image
# relative to the length of the initial path.
# Thus, the last inner step is rejected and the relaxation phase stopped if, for any image, the distance
# to the nearest evaluated image is larger than 'disp_max' times the length of the initial path.
disp_max = 0.5

# 'num_bigiter_init' defines the number of outer iterations started from the initial path 'R_init'
# - Until 'num_bigiter_init' is reached, each relaxation phase is started from the initial path 'R_init'.
#     (If climbing image is used, the CI phase is continued from the "preliminarily converged" equally spaced path.)
# - After that, each relaxation phase is started from the latest converged path.
#     (If climbing image is used, each relaxation phase is started from the latest "preliminarily converged" equally spaced path,
#      and the CI phase started from the latest converged CI-path if CI is unchanged (otherwise from the current
#      "preliminarily converged" evenly spaced path).)
# Starting each round from the initial path may improve stability (and decrease outer iterations),
# but starting from the latest path may decrease the amount of inner iterations during the relaxation phases.
num_bigiter_init = np.inf

num_bigiter = 300 # define the maximum number of outer iterations (new sets of observations)
num_iter = 5 # define the maximum number of inner iterations (steps during a relaxation phase)

# 'num_bigiter_hess' defines the number of outer iterations using the "virtual Hessian",
# i.e., additional observations around the minimum points. The "virtual Hessian"
# may slow down the GP computations especially in high-dimensional cases,
# but they may give useful information in the beginning.
# They usually don't bring gain after 4 outer iterations (but in some cases do).
# By setting 'num_bigiter_hess' to zero, the "virtual Hessian" is set off.
num_bigiter_hess = 0
eps_hess = 0.001 # defines the distance of the additional points from the minimum points

# Define here the GP model by choosing the covariance function family and the
# observation model (likelihood). The hyperparameters of the GP model will
# be optimized before each relaxation phase, unless 'prior_fixed' is set
# (like here for the observation noise 'sigma2'). If the prior is left out,
# an uninformative default prior is used.

# 'ker_const' defines a constant covariance function, which has a
# similar effect as integration over an unknown constant mean function
# having a Gaussian prior distribution with variance 'variance'.
# Thus, adding this to the covariance function gives the mean level of the GP
# more space to vary.
ker_const = GPy.kern.Bias(input_dim=2, variance=100.0)
ker_const.variance.constrain_fixed(value=100.0,warning=True,trigger_parent=True)

# 'ker_sexp' defines a squared exponential covariance function which favours
# smooth functions. The hyperparameter 'variance' controls the magnitude
# of the overall variation of the function, and 'lengthscale' defines how fast the function
# can chance. 'lengthscale' can be the same for all dimensions (ARD=False)
# or there can be an independent value for each dimension (ARD=True).
# In this 2D example, independent values are reasonable,
# but in high-dimensional cases a shared 'lengthscale' may be better identifiable.
#ker_sexp = GPy.kern.RBF(input_dim=2, variance=0.1, lengthscale=1.0, ARD=True)
ker_sexp = GPy.kern.RBF(input_dim=2, variance=0.1, lengthscale=1.0, ARD=False)
t_prior = GPy.priors.StudentT(0.0,1.0,4.0)
t_prior.domain = '_POSITIVE'
ker_sexp.lengthscale.set_prior(t_prior)

ker = ker_const + ker_sexp
diffker0 = GPy.kern.DiffKern(ker,0)
diffker1 = GPy.kern.DiffKern(ker,1)
kernel_list = [ker,diffker0,diffker1]

# 'lik' defines a Gaussian noise model. Here, we assume that the observations
# are accurate, so 'variance' should be set small. However, if it is set too
# small, numerical issues may arise.
lik = GPy.likelihoods.Gaussian(variance=1e-8)
lik.variance.constrain_fixed(value=1e-8,warning=True,trigger_parent=True)
likelihood_list = [lik]*(D+1)

# Define the optimization function for the GPy hyperparameter optimization.
# 'opt_SCG' may be more stable, but slower than 'opt_lbfgsb'.
opt = paramz.optimization.optimization.opt_SCG(max_iters=1000, xtol=1e-4, ftol=1e-4, gtol=1e-4)
#opt = paramz.optimization.optimization.opt_lbfgsb(bfgs_factor=10000000.0,gtol=1.0e-4)

# 'visualize' indicates if the true energy along the path is visualized (1) after each relaxation phase or not (0).
# These visualizations require large amount of extra evaluations, so this option is not meant to be used in real applications.
visualize = 1

# Call the GP-sNEB algorithm
R,E_R,G_R,i_CI,gp,R_all,E_all,G_all,obs_at,E_R_acc,E_R_gp, \
normF_R_acc,normF_R_gp,normFCI_acc,normFCI_gp,param_gp,figs = \
GP_sNEB_AIE.GP_sNEB_AIE(pot_general=pot_general,R_init=R_init,method_step=method_step,kernel_list=kernel_list,likelihood_list=likelihood_list,opt=opt, \
param_step=param_step,k_par=k_par,k_perp=k_perp,T_MEP=T_MEP,T_CI=T_CI,T_CIon_gp=T_CIon_gp, \
divisor_T_MEP_gp=divisor_T_MEP_gp,disp_max=disp_max,num_bigiter_init=num_bigiter_init,num_bigiter=num_bigiter,num_iter=num_iter, \
num_bigiter_hess=num_bigiter_hess,eps_hess=eps_hess,visualize=visualize)

# Plot the behaviour
fig = plt.figure()
sub1 = fig.add_subplot(121)
sub1.set_title('Magnitude of the NEB force on one image (GP approximation)')
sub1.plot(range(1,normF_R_gp.shape[1]+1),np.max(normF_R_gp,0),label='Max',color='r')
sub1.plot(range(1,normF_R_gp.shape[1]+1),np.mean(normF_R_gp,0),label='Mean',color='b')
sub1.plot(range(1,normF_R_gp.shape[1]+1),normFCI_gp,label='CI',color='g')
sub1.plot(obs_at,np.max(normF_R_acc,0),'ro')
sub1.plot(obs_at,np.mean(normF_R_acc,0),'bo')
sub1.plot(obs_at,normFCI_acc,'go')
sub1.set_xlabel('iteration')
sub1.legend()
sub2 = fig.add_subplot(122)
sub2.set_title('Mean energy over the images (GP approximation)')
sub2.plot(range(1,E_R_gp.shape[1]+1),np.mean(E_R_gp,0),color='b')
sub2.plot(obs_at,np.mean(E_R_acc,0),'bo')
sub2.set_xlabel('iteration')
plt.show()

