##### atomic_GP_dimer.py
##### Copyright: Olli-Pekka Koistinen, Aalto University, 9.7.2020
#####
##### This function uses the atomic GP-dimer method to converge to a saddle point,
##### starting from somewhere inside the convergence area.
#####
##### The relaxation of the dimer on the approximated energy surface
##### is done according to a dimer method, where a rotation step rotates
##### the dimer (a pair of images) towards its minimum energy orientation
##### to find the lowest curvature mode of the potential energy and
##### translation step moves the dimer towards the saddle point by
##### inverting the force component in the direction of the dimer.
##### After each relaxation phase, the energy and gradient are acquired at
##### the middle point of the dimer, and the GP hyperparameters are reoptimized.
#####
##### The atomic version of GP-dimer uses a special GPy covariance function 'RBF_atomic' implemented in 'rbf_atomic.py',
##### where the distance between configurations C and C' is based on the changes of the inter-atomic distances.
##### To use that kernel, GPy should be installed from:
##### https://github.com/esiivola/GPy/tree/feature-multioutput-grad-obs
##### The file 'rbf_atomic.py' should be then added to the folder 'GPy/GPy/kern/src/'
##### and the following line added to 'GPy/GPy/kern/__init__.py':
##### from .src.rbf_atomic import RBF_atomic
##### In addition, the files 'add.py' and 'static.py' should be updated in 'GPy/GPy/kern/src/'.
#####
##### Input:
#####   pot_general            accurate potential and gradient function
#####                            (takes 'N_im' images as ndarray of shape 'N_im' x 'D',
#####                             and returns the potential energy at those images as ndarray of shape 'N_im' x 1
#####                             and the gradient of the potential energy as ndarray of shape 'N_im' x 'D')
#####   conf_info              dictionary including information about the configurations necessary for the GP model
#####                           - conf_info['conf_fro']: coordinates of active frozen atoms (ndarray of shape 'N_fro' x 3)
#####                           - conf_info['atomtype_mov']: atomtype indices for moving atoms (ndarray of shape 'N_mov')
#####                           - conf_info['atomtype_fro']: pairtype indices for active frozen atoms (ndarray of shape 'N_fro')
#####                           - conf_info['pairtype']: pairtype indices for pairs of atomtypes (ndarray of shape 'n_at' x 'n_at')
#####                           - conf_info['n_pt']: number of active pairtypes
#####   conf_info_inactive     dictionary including information about inactive frozen atoms
#####                           - conf_info_inactive['conf_ifro']: coordinates of inactive frozen atoms (ndarray of shape 'N_ifro' x 3)
#####                           - conf_info_inactive['atomtype_ifro']: atomtype indices for inactive frozen atoms (ndarray of shape 'N_ifro')
#####   actdist_fro            activation distance for moving+frozen atom pairs (inf if all active)
#####   R_init                 coordinates of the middle point of the initial dimer (ndarray of shape 1 x 'D')
#####   orient_init            unit vector along the direction of the initial dimer (ndarray of shape 1 x 'D')
#####   method_rot             a function defining the rotation step (see, e.g., 'utils_dimer.rot_iter_lbfgsext')
#####   method_trans           a function defining the translation step (see, e.g., 'utils_dimer.trans_iter_lbfgs')
#####   param_trans            parameters of the translation method (shape depends on 'method_trans')
#####   E_init                 energy at the middle point of the initial dimer (ndarray of shape 1 x 1) [default empty] (if not empty, 'R_init' should be included in 'R_all_init')
#####   G_init                 gradient at the middle point of the initial dimer (ndarray of shape 1 x 'D') [default empty] (if not empty, 'R_init' should be included in 'R_all_init')
#####   R_all_init             coordinates of the initial data points (ndarray of shape 'N_obs' x 'D') [default empty]
#####   E_all_init             energies at the initial data points (ndarray of shape 'N_obs' x 1) [default empty]
#####   G_all_init             gradients at the initial data points (ndarray of shape 'N_obs' x 'D') [default empty]
#####   dimer_sep              dimer separation (distance from the middle point of the dimer to the two images) [default 0.01]
#####   eval_image1            indicator if image 1 of the dimer is evaluted (1) or not (0) after each relaxation phase
#####                            in addition to the middle point of the dimer [default 0]
#####   T_dimer                final convergence threshold for 'maxF_R', which is the maximum component of the force acting on the middle point of the dimer
#####                            (i.e., the algorithm is stopped when all components of the accurate force are below 'T_dimer') [default 0.01]
#####   initrot_nogp           indicator if the initial rotations are performed without GP (1) or with GP (0) [default 0]
#####   T_anglerot_init        convergence threshold for rotation angle in the initial rotations
#####                            (the dimer is not rotated when the estimated rotation angle is less than this) [default 0.0873]
#####   num_iter_initrot       maximum number of initial rotations (0 if initial rotations skipped) [default 10]
#####   inittrans_nogp         indicator if an initial test translation step is taken without GP (1)
#####                            or if GP is used right after initial rotations (0) [default 0]
#####   T_anglerot_gp          convergence threshold for rotation angle during a relaxation phase [default 0.01]
#####   num_iter_rot_gp        maximum number of rotation iterations per translation during a relaxation phase [default 10]
#####   divisor_T_dimer_gp     if this option is set on (> 0), the convergence threshold for a relaxation phase is 1/'divisor_T_dimer_gp'
#####                            of the smallest accurate 'maxF_R' obtained so far, but not less than 1/10 of 'T_dimer'
#####                            (otherwise the GP convergence threshold is always 1/10 of 'T_dimer') [default 10.0]
#####   disp_max               maximum displacement of the middle point of the dimer from the nearest observed data point
#####                            (the relaxation phase is stopped if 'disp_max' is reached) [default 0.5]
#####   ratio_at_limit         limit for the ratio (< 1) of inter-atomic distances between image and its "nearest" observed data point
#####                            (the relaxation phase is stopped if 'ratio_at_limit' is reached for any image) [default 2.0/3.0]
#####   num_bigiter_initloc    number of outer iterations started from the initial location 'R_init'
#####                            (after that, each relaxation phase is started from the latest converged dimer) [default np.inf]
#####   num_bigiter_initparam  number of outer iterations where the hyperparameter optimization is started
#####                            from values initialized based on the range of the current data
#####                            (after that, the optimization is started from the values of the previous round) [default np.inf]
#####   num_bigiter            maximum number of outer iterations (new pairs of observations) [default 300]
#####   num_iter               maximum number of inner iterations (steps during a relaxation phase) [default 10000]
#####   islarge_num_iter       indicator if 'num_iter' is assumed to be much larger than required for dimer convergence on accurate energy surface
#####                            (if not, the next relaxation phase is continued from the current path if 'num_iter' is reached) [default 1]
#####   load_file              path to the data file required to continue from a cancelled run ('' if started normally from the beginning) [default '']
#####   save_file              path to the data file where data is saved ('' if not saved) [default '']
#####
##### Output:
#####   R                      coordinates of the middle point of the final dimer (ndarray of shape 1 x 'D')
#####   orient                 unit vector along the direction of the final dimer (ndarray of shape 1 x 'D')
#####   E_R                    energy at the middle point of the final dimer
#####   G_R                    gradient at the middle point of the final dimer (ndarray of shape 1 x 'D')
#####   gp_model               the final GP model
#####   R_all                  coordinates of all observation points (ndarray of shape 'N_obs' x 'D')
#####   E_all                  energies for all observation points (ndarray of shape 'N_obs' x 1)
#####   G_all                  gradients for all observation points (ndarray of shape 'N_obs' x 'D')
#####   obs_at                 total numbers of inner iterations before new observations were taken
#####   E_R_acc                accurate energy of the middle point of the dimer for each outer iteration
#####   E_R_gp                 approximated energy of the middle point of the dimer for each inner iteration
#####   maxF_R_acc             accurate maximum component of force acting on the middle point of the dimer for each outer iteration
#####   maxF_R_gp              approximated maximum component of force acting on the middle point of the dimer for each inner iteration
#####   param_gp_initrot       optimized GP hyperparameters for each outer iteration during initial rotations
#####   param_gp               optimized GP hyperparameters for each outer iteration
#####   obs_initrot            number of observations required for initial rotations
#####   obs_total              number of total observations
#####   num_esmax              number of outer iterations stopped by maximum inner iteration limit
#####   num_es1                number of outer iterations stopped by inter-atomic stopping criterion
#####   num_es2                number of outer iterations stopeed by raw distance stopping criterion

import numpy as np
from scipy.stats import norm
import utils
import utils_atomic
import utils_dimer
import GPy
import paramz
import pdb

def GP_dimer(pot_general,conf_info,conf_info_inactive,actdist_fro,R_init,orient_init,method_rot,method_trans,param_trans, \
             E_init,G_init,R_all_init,E_all_init,G_all_init,dimer_sep,eval_image1,T_dimer,initrot_nogp,T_anglerot_init, \
             num_iter_initrot,inittrans_nogp,T_anglerot_gp,num_iter_rot_gp,divisor_T_dimer_gp,disp_max,ratio_at_limit \
             num_bigiter_initloc,num_bigiter_initparam,num_bigiter,num_iter,islarge_num_iter,load_file,save_file):
         
    if not load_file:
         
        ###     
        ### THIS INFORMATION IS ASSUMED TO BE KNOWN BEFORE BEGINNING
        ###

        N_obs_init = E_all_init.shape[0]
        # dimension of the space (scalar):
        D = R_init.shape[1]
        # if 'orient_init' is empty, draw random unit vector:
        if orient_init.shape[0] < 1:
            orient_init = np.random.normal(size=(1,D))
        orient_init = orient_init/np.sqrt(np.sum(np.square(orient_init)))
    
        ###
        ### THE ALGORITHM BEGINS HERE
        ###

        obs_initrot = 0
        obs_total = 0
        num_esmax = 0
        num_es1 = 0
        num_es2 = 0
        rotinfo = {}
        transinfo = {}

        # coordinates of the middle point of the dimer:
        R = R_init.copy()
        # unit vector along the direction of the dimer:
        orient = orient_init.copy()      
        if E_init.shape[0] < 1:
            # energy and gradient at the middle point of the dimer:
            E_R, G_R = pot_general(R)
            if E_R.ndim < 2:
                print('ERROR: Modify your energy function so that it returns two-dimensional ndarrays (of shape ''N_im'' x 1 and ''N_im'' x ''D''), even if there is only one image in the input (''N_im'' = 1)!')
                return
            # set zero level of biased potential to the energy of the middle point of the initial dimer:
            Elevel = E_R[0,0]
            # define biased potential with zero level at 'Elevel':
            pot_biased = lambda R : utils_dimer.subtract_Elevel(pot_general,R,Elevel)
            E_R = E_R - Elevel
            # coordinates of all observation points:
            R_all = np.vstack((R_all_init,R))
            # energy for all observation points:
            E_all = np.vstack((E_all_init-Elevel,E_R))
            # gradient for all observation points:
            G_all = np.vstack((G_all_init,G_R))
            print('Evaluated the middle point of the initial dimer.\n')
        else:
            # set zero level of biased potential to the energy of the middle point of the initial dimer:
            Elevel = E_init[0,0]
            # define biased potential with zero level at 'Elevel':
            pot_biased = lambda R : utils_dimer.subtract_Elevel(pot_general,R,Elevel)
            E_R = E_init - Elevel
            G_R = G_init.copy()
            # coordinates of all observation points:
            R_all = R_all_init.copy()
            # energy for all observation points:
            E_all = E_all_init-Elevel
            # gradient for all observation points:
            G_all = G_all_init.copy()
        R1 = np.ndarray(shape=(0,D))
        E1 = np.ndarray(shape=(0,1))
        G1 = np.ndarray(shape=(0,D))

        # vector gathering accurate energy of the middle point of the dimer for each outer iteration:
        E_R_acc = E_R[0,:]
        # vector gathering accurate maximum component of the force acting on the middle point of the dimer for each outer iteration:
        maxF_R_acc = np.max(np.abs(G_R),1)
        # vector gathering the total numbers of inner iterations before new observations were taken:
        obs_at = np.array([0])
        print('Accurate values in the beginning: E_R = {:g}, maxF_R = {:g} \n\n'.format(E_R_acc[-1],maxF_R_acc[-1]))

        gp_model = None
        # optimized GP hyperparameters for each outer iteration during initial rotations:
        param_gp_initrot = None
        # vector gathering approximated energy of the middle point of the dimer for each inner iteration: 
        E_R_gp = np.ndarray(shape=(0))
        # vector gathering approximated maximum component of the force acting on the middle point of the dimer for each inner iteration:
        maxF_R_gp = np.ndarray(shape=(0))
        # optimized GP hyperparameters for each relaxation phase:
        param_gp = None

        # stop the algorithm if final convergence is obtained:
        if maxF_R_acc[-1] < T_dimer:
            fprintf('Final convergence obtained in the beginning ({:g} image evaluations).\n'.format(E_all.shape[0]-N_obs_init))
            return
    
        # evaluate image 1 of the dimer if option 'eval_image1' is set on:
        if eval_image1 > 0:
            R1 = R + dimer_sep*orient
            E1, G1 = pot_biased(R1)
            if E1.ndim < 2:
                print('ERROR: Modify your energy function so that it returns two-dimensional ndarrays (of shape ''N_im'' x 1 and ''N_im'' x ''D''), even if there is only one image in the input (''N_im'' = 1)!')
                return
            R_all = np.vstack((R_all,R1))
            E_all = np.vstack((E_all,E1))
            G_all = np.vstack((G_all,G1))
            print('Evaluated image 1 of the initial dimer.\n')

        # initialize the GP model:
        ker_const = GPy.kern.Bias(input_dim=D)
        ker_const.variance.constrain_fixed()
        utils_atomic.update_active_fro(conf_info,conf_info_inactive,R_all,actdist_fro)
        print('{:g} active and {:g} inactive frozen atoms in the beginning.\n'.format(conf_info['conf_fro'].shape[0],conf_info_inactive['conf_ifro'].shape[0]))
        ker_sexpat = GPy.kern.RBF_atomic(input_dim=D, magnitude=1., lengthscale=np.ones(conf_info['n_pt']), conf_info=conf_info)
        ker = ker_const + ker_sexpat
        kernel_list = [ker]
        for dim in range(0,D):
            kernel_list += [GPy.kern.DiffKern(ker,dim)]
        lik = GPy.likelihoods.Gaussian()
        lik.variance.constrain_fixed(value=1e-8)
        likelihood_list = [lik]*(D+1)
        opt = paramz.optimization.optimization.opt_SCG(max_iters=1000, xtol=1e-4, ftol=1e-4, gtol=1e-4)
        gp_model = GPy.models.MultioutputGP(X_list=[R_all]*(D+1),Y_list=[E_all]+np.hsplit(G_all,D),kernel_list=kernel_list,likelihood_list=likelihood_list,inference_method=GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference())
    
        if num_iter_initrot > 0:
        
            # evaluate image 1 of the dimer if not already done:
            if eval_image1 < 1:
                R1 = R + dimer_sep*orient
                E1, G1 = pot_biased(R1)
                if E1.ndim < 2:
                    print('ERROR: Modify your energy function so that it returns two-dimensional ndarrays (of shape ''N_im'' x 1 and ''N_im'' x ''D''), even if there is only one image in the input (''N_im'' = 1)!')
                    return
                R_all = np.vstack((R_all,R1))
                E_all = np.vstack((E_all,E1))
                G_all = np.vstack((G_all,G1))
                print('Evaluated image 1 of the initial dimer for initial rotations.\n')
        
            if initrot_nogp < 1:        

                # OUTER ITERATION LOOP FOR INITIAL ROTATIONS
                for ind_bigiter_initrot in range(num_iter_initrot+1):

                    # stop initial rotations if converged:
                    F_rot = utils_dimer.force_rot(np.vstack((G_R,G1)),orient,dimer_sep)
                    F_0 = np.sqrt(np.sum(np.square(F_rot)))
                    C_0 = np.dot(-G_R[0,:]+G1[0,:],orient[0,:])/dimer_sep
                    dtheta = 0.5*np.arctan(0.5*F_0/np.abs(C_0))
                    if dtheta < T_anglerot_init:
                        print('Rotated the initial dimer in {:g} outer iterations (total number of image evaluations: {:g}).\n'.format(ind_bigiter_initrot,E_all.shape[0]-N_obs_init))
                        break

                    # stop initial rotations if maximum number of outer iterations reached:
                    if ind_bigiter_initrot == num_iter_initrot:
                        print('WARNING: Tried to rotate the initial dimer, but maximum of {:g} outer iterations reached before convergence (total number of image evaluations: {:g}).\n'.format(ind_bigiter_initrot,E_all.shape[0]-N_obs_init))
                        break
                
                    # optimize the GP hyperparameters and set up the approximated potential function for the inner iteration loop:
                    mean_y = np.mean(E_all)
                    range_y = np.max(E_all)-np.min(E_all)
                    range_x = np.max(utils_atomic.dist_at(R_all,R_all,conf_info,np.ones(conf_info['n_pt'])))
                    gp_model.kern.sum.bias.variance = np.max((1.0,mean_y**2))
                    gp_model.kern.sum.RBF_atomic.magnitude = norm.ppf(0.75,0,(range_y/3))
                    gp_model.kern.sum.RBF_atomic.lengthscale = norm.ppf(0.75,0,range_x/3)*np.ones(conf_info['n_pt'])
                    mag_prior = GPy.priors.Gaussian(mu=0.0,sigma=np.max((1.0,range_y/3)))
                    mag_prior.domain = '_POSITIVE'
                    gp_model.kern.sum.RBF_atomic.magnitude.set_prior(mag_prior)
                    len_prior = GPy.priors.Gaussian(mu=0.0,sigma=np.max((1.0,(range_x/3))))
                    len_prior.domain = '_POSITIVE'
                    gp_model.kern.sum.RBF_atomic.lengthscale.set_prior(len_prior)
                    gp_model.set_XY([R_all]*(D+1),[E_all]+np.hsplit(G_all,D))
                    gp_model.optimize(optimizer=opt)
                    if np.all(param_gp_initrot == None):
                        param_gp_initrot = gp_model[:][np.newaxis]
                    else:
                        param_gp_initrot = np.vstack((param_gp_initrot,gp_model[:]))
                        
                    pot_gp = lambda R : utils_dimer.potential_gp(R,gp_model)

                    # define the convergence threshold for the inner iteration loop:
                    T_anglerot_init_gp = np.min((0.01,T_anglerot_init/10.0))

                    # define the initial dimer orientation for the inner iteration loop:
                    orient_old = orient.copy()
                    orient = orient_init.copy()

                    # calculate approximated energy and gradient at the middle point and image 1 of the dimer:
                    R01 = np.vstack((R,R+dimer_sep*orient))
                    E01, G01 = pot_gp(R01)
            
                    # rotational force of the previous rotation iteration:
                    rotinfo['F_rot_old'] = np.zeros((1,D))
                    # modified rotational force of the previous rotation iteration:
                    rotinfo['F_modrot_old'] = np.zeros((1,D))
                    # unit vector perpendicular to 'orient' within the rotation plane of the previous rotation iteration:
                    rotinfo['orient_rot_oldplane'] = np.zeros((1,D))
                    # number of conjugated rotation iterations:
                    rotinfo['cgiter_rot'] = 0
                    # maximum number of conjugated rotation iterations before resetting the conjugate directions:
                    rotinfo['num_cgiter_rot'] = D
                    # change of orientation in m previous rotation iterations (in L-BFGS):
                    rotinfo['deltaR_mem'] = np.ndarray(shape=(0,D))
                    # change of rotational force in m previous rotation iterations excluding the last one (in L-BFGS):
                    rotinfo['deltaF_mem'] = np.ndarray(shape=(0,D))
                    # maximum number of previous rotation iterations kept in memory (in L-BFGS):
                    rotinfo['num_lbfgsiter_rot'] = D
                    # indicator of early stopping:
                    not_relaxed = 0
             
                    # INNER ITERATION LOOP FOR INITIAL ROTATIONS
                    for ind_iter in range(num_iter+1):
                
                        # stop the inner iteration loop if maximum number of iterations reached:
                        if ind_iter == num_iter:
                            print('WARNING: Maximum number of inner iterations ({:g}) reached during initial rotations.\n'.format(ind_iter))
                            not_relaxed = 1
                            break
                    
                        orient_old_gp = orient.copy()

                        # if necessary, rotate the dimer and re-calculate approximated energy and gradient at image 1:
                        orient, Curv, R_obs, E_obs, G_obs = method_rot(R,orient,G01,pot_gp,dimer_sep,T_anglerot_init_gp,0,rotinfo)
                        if R_obs.shape[0] < 1:
                            break
                        else:
                            R01[1,:] = R+dimer_sep*orient
                            E01[1,0], G01[1,:] = pot_gp(R01[1,:][np.newaxis])
                            if np.arccos(np.dot(orient[0,:],orient_old_gp[0,:])) < T_anglerot_init_gp:
                                break
                    
                    # END OF INNER ITERATION LOOP FOR INITIAL ROTATIONS
            
                    if ind_iter < 1:
                        print('Rotated the initial dimer in {:g} outer iterations (total number of image evaluations: {:g}).\n'.format(ind_bigiter_initrot,E_all.shape[0]-N_obs_init))
                        print('WARNING: Dimer orientation converged on the GP surface (T_anglerot_init_gp = {:g}), but not on the true PES (T_anglerot_init = {:g}).\n'.format(T_anglerot_init_gp,T_anglerot_init))
                        break
                    else:
                        if ind_bigiter_initrot > 0:
                            if np.arccos(np.dot(orient[0,:],orient_old[0,:])) < T_anglerot_init:
                                print('Rotated the initial dimer in {:g} outer iterations (total number of image evaluations: {:g}).\n'.format(ind_bigiter_initrot+1,E_all.shape[0]-N_obs_init))
                                break
                        R1 = R+dimer_sep*orient
                        E1, G1 = pot_biased(R1)
                        R_all = np.vstack((R_all,R1))
                        E_all = np.vstack((E_all,E1))
                        G_all = np.vstack((G_all,G1))
                
                # END OF OUTER ITERATION LOOP FOR INITIAL ROTATIONS
        
            else:
        
                # rotational force of the previous rotation iteration:
                rotinfo['F_rot_old'] = np.zeros((1,D))
                # modified rotational force of the previous rotation iteration:
                rotinfo['F_modrot_old'] = np.zeros((1,D))
                # unit vector perpendicular to 'orient' within the rotation plane of the previous rotation iteration:
                rotinfo['orient_rot_oldplane'] = np.zeros((1,D))
                # number of conjugated rotation iterations:
                rotinfo['cgiter_rot'] = 0
                # maximum number of conjugated rotation iterations before resetting the conjugate directions:
                rotinfo['num_cgiter_rot'] = D
                # change of orientation in m previous rotation iterations (in L-BFGS):
                rotinfo['deltaR_mem'] = np.ndarray(shape=(0,D))
                # change of rotational force in m previous rotation iterations excluding the last one (in L-BFGS): 
                rotinfo['deltaF_mem'] = np.ndarray(shape=(0,D))
                # maximum number of previous rotation iterations kept in memory (in L-BFGS)
                rotinfo['num_lbfgsiter_rot'] = D
            
                # ITERATION LOOP FOR INITIAL ROTATIONS
                for ind_iter_initrot in range(num_iter_initrot+1):
            
                    # stop initial rotations if maximum number of iterations reached:
                    if ind_iter_initrot == num_iter_initrot:
                        print('WARNING: Tried to rotate the initial dimer, but maximum of {:g} iterations reached before convergence (total number of image evaluations: {:g}).\n'.format(ind_iter_initrot,E_all.shape[0]-N_obs_init))
                        break
            
                    # if necessary, rotate the dimer and re-evaluate energy and gradient at image 1:
                    orient, Curv, R_obs, E_obs, G_obs = method_rot(R,orient,np.vstack((G_R,G1)),pot_biased,dimer_sep,T_anglerot_init,0,rotinfo)
                    if R_obs.shape[0] < 1:
                        print('Rotated the initial dimer {:g} times (total number of image evaluations: {:g}).\n'.format(ind_iter_initrot,E_all.shape[0]-N_obs_init))
                        break
                    else:
                        R1 = R+dimer_sep*orient
                        E1, G1 = pot_biased(R1)
                        R_all = np.vstack((R_all,R_obs,R1))
                        E_all = np.vstack((E_all,E_obs,E1))
                        G_all = np.vstack((G_all,G_obs,G1))
            
                # END OF ITERATION LOOP FOR INITIAL ROTATIONS
    
        obs_initrot = E_all.shape[0]-N_obs_init
    
        if inittrans_nogp > 0:
        
            if G1.shape[0] < 1:
                R1 = R + dimer_sep*orient
                E1, G1 = pot_biased(R1)
                if E1.ndim < 2:
                    print('ERROR: Modify your energy function so that it returns two-dimensional ndarrays (of shape ''N_im'' x 1 and ''N_im'' x ''D''), even if there is only one image in the input (''N_im'' = 1)!')
                    return
                R_all = np.vstack((R_all,R1))
                E_all = np.vstack((E_all,E1))
                G_all = np.vstack((G_all,G1))
                print('Evaluated image 1 of the dimer for an initial translation step.\n')
        
            # translate the dimer once with a test step and evaluate energy and gradient at the middle point of the dimer:
            Curv = np.dot(-G_R[0,:]+G1[0,:],orient[0,:])/dimer_sep
            F_trans = -G_R + 2*np.dot(G_R[0,:],orient[0,:])*orient
            R = R + 0.5*F_trans/np.abs(Curv)
            print('Translated the dimer once with a test step, and evaluated the middle point.\n')
            E_R, G_R = pot_biased(R)
            if E_R.ndim < 2:
                print('ERROR: Modify your energy function so that it returns two-dimensional ndarrays (of shape ''N_im'' x 1 and ''N_im'' x ''D''), even if there is only one image in the input (''N_im'' = 1)!')
                return
            R_all = np.vstack((R_all,R))
            E_all = np.vstack((E_all,E_R))
            G_all = np.vstack((G_all,G_R))
            print('Accurate values after an initial translation step: E_R = {:g}, maxF_R = {:g} \n\n'.format(E_R[0,0],np.max(np.abs(G_R))))
        
            # stop the algorithm if final convergence is obtained:
            if np.max(np.abs(G_R)) < T_dimer:
                print('Final convergence obtained after an initial translation step ({:g} image evaluations).\n'.format(E_all.shape[0]-N_obs_init))
                return
        
            # evaluate image 1 of the dimer if option 'eval_image1' is set on:
            if eval_image1 > 0:
                R1 = R + dimer_sep*orient
                E1, G1 = pot_biased(R1)
                R_all = np.vstack((R_all,R1))
                E_all = np.vstack((E_all,E1))
                G_all = np.vstack((G_all,G1))
                print('Evaluated image 1 of the dimer.\n')

            if actdist_fro < np.inf:
                # check if new active frozen atoms and update 'conf_info' and 'conf_info_inactive':
                new_act = utils_atomic.update_active_fro(conf_info,conf_info_inactive,R,actdist_fro)
                # if new active frozen atoms, update the GP model:
                if new_act > 0:
                    print('More frozen atoms activated. Now {:g} active and {:g} inactive frozen atoms.\n'.format(conf_info['conf_fro'].shape[0],conf_info_inactive['conf_ifro'].shape[0]))
                    gp_model.kern.sum.RBF_atomic.conf_info = conf_info
    
        orient_init_gp = orient.copy()
        R_latest_conv = np.ndarray(shape=(0,D))
        orient_latest_conv = np.ndarray(shape=(0,D))
        R_previous = np.ndarray(shape=(0,D))
        orient_previous = np.ndarray(shape=(0,D))
     
        if R_all.shape[0] < 2:
            R1 = R + dimer_sep*orient
            E1, G1 = pot_biased(R1)
            R_all = np.vstack((R_all,R1))
            E_all = np.vstack((E_all,E1))
            G_all = np.vstack((G_all,G1))
            print('Evaluated image 1 of the dimer for the initial GP model.\n')
        
        ind_bigiter_start = 1
    else:
        ##### NOTICE: IMPLEMENT HERE LOADING OF DATA FROM 'load_file' !!!        
        print('ERROR: LOADING DATA FROM FILE NOT IMPLEMENTED!')
        return
        # load(load_file)
        # bigiter_init = bigiter + 1
    
    # OUTER ITERATION LOOP
    for ind_bigiter in range(ind_bigiter_start,num_bigiter+1):
        
        if save_file:
            ##### NOTICE: IMPLEMENT HERE SAVING DATA TO FILE !!!
            print('ERROR: SAVING DATA TO FILE NOT IMPLEMENTED!')
            #save(save_file)     

        # optimize the GP hyperparameters and calculate some variables unchanged during the relaxation:
        mean_y = np.mean(E_all)
        range_y = np.max(E_all)-np.min(E_all)
        range_x = np.max(utils_atomic.dist_at(R_all,R_all,conf_info,np.ones(conf_info['n_pt'])))
        gp_model.kern.sum.bias.variance = np.max((1.0,mean_y**2))
        mag_prior = GPy.priors.Gaussian(mu=0.0,sigma=np.max((1.0,range_y/3)))
        mag_prior.domain = '_POSITIVE'
        gp_model.kern.sum.RBF_atomic.magnitude.set_prior(mag_prior)
        len_prior = GPy.priors.Gaussian(mu=0.0,sigma=np.max((1.0,(range_x/3))))
        len_prior.domain = '_POSITIVE'
        gp_model.kern.sum.RBF_atomic.lengthscale.set_prior(len_prior)
        if ind_bigiter < 2 or ind_bigiter <= num_bigiter_initparam or conf_info['n_pt'] > gp_model.kern.sum.RBF_atomic.lengthscale.shape[0]:
            gp_model.kern.sum.RBF_atomic.magnitude = norm.ppf(0.75,0,range_y/3)
            gp_model.kern.sum.RBF_atomic.lengthscale = norm.ppf(0.75,0,range_x/3)*np.ones(conf_info['n_pt'])
        gp_model.set_XY([R_all]*(D+1),[E_all]+np.hsplit(G_all,D))
        gp_model.optimize(optimizer=opt)                    
        if np.all(param_gp == None):
            param_gp = gp_model[:][np.newaxis]
        else:
            param_gp = np.vstack((param_gp,gp_model[:]))
                    
        pot_gp = lambda R : utils_dimer.potential_gp(R,gp_model)
        
        # define the convergence threshold for the relaxation phase:
        if divisor_T_dimer_gp > 0:
            # if this option is set on, the GP convergence threshold is 1/'divisor_T_MEP_gp'
            # of the smallest accurate 'maxF_R' obtained so far,
            # but not less than 1/10 of the final threshold:
            T_dimer_gp = np.max((np.min(maxF_R_acc)/divisor_T_dimer_gp,T_dimer/10.0))
        else:
            # otherwise the GP convergence threshold is always 1/10 of the final threshold:
            T_dimer_gp = T_dimer/10.0

        # define the initial dimer for the relaxation phase:
        if islarge_num_iter > 0 or R_previous.shape[0] < 1:
            if ind_bigiter > num_bigiter_initloc and R_latest_conv.shape[0] > 0:
                R = R_latest_conv.copy()
                orient = orient_latest_conv.copy()
                print('Started relaxation phase from the latest converged dimer.\n')
            else:
                R = R_init.copy()
                orient = orient_init_gp.copy()
                print('Started relaxation phase from the initial location.\n')
        else:
            R = R_previous.copy()
            orient = orient_previous.copy()
            print('Started relaxation phase where the previous one stopped.\n')
            R_previous = np.ndarray(shape=(0,D))
            orient_previous = np.ndarray(shape=(0,D))

        # rotational force of the previous rotation iteration:
        rotinfo['F_rot_old'] = np.zeros((1,D))
        # modified rotational force of the previous rotation iteration:
        rotinfo['F_modrot_old'] = np.zeros((1,D))
        # unit vector perpendicular to 'orient' within the rotation plane of the previous rotation iteration:
        rotinfo['orient_rot_oldplane'] = np.zeros((1,D))
        # number of conjugated rotation iterations:
        rotinfo['cgiter_rot'] = 0
        # maximum number of conjugated rotation iterations before resetting the conjugate directions:
        rotinfo['num_cgiter_rot'] = D
        # change of orientation in m previous rotation iterations (in L-BFGS):
        rotinfo['deltaR_mem'] = np.ndarray(shape=(0,D))
        # change of rotational force in m previous rotation iterations excluding the last one (in L-BFGS): 
        rotinfo['deltaF_mem'] = np.ndarray(shape=(0,D))
        # maximum number of previous rotation iterations kept in memory (in L-BFGS):
        rotinfo['num_lbfgsiter_rot'] = D       
        transinfo['potential'] = pot_gp
        # translational force of the previous translation iteration:
        transinfo['F_trans_old'] = np.zeros((1,D))
        # modified translational force of the previous translation iteration:
        transinfo['F_modtrans_old'] = np.zeros((1,D))
        # velocity of the middle point of the dimer in the previous translation iteration:
        transinfo['V_old'] = np.zeros((1,D))
        # indicator if zero velocity used:
        transinfo['zeroV'] = 1
        # number of conjugated transition iterations:
        transinfo['cgiter_trans'] = 0
        # maximum number of conjugated transition iterations before resetting the conjugate directions:
        transinfo['num_cgiter_trans'] = D
        # change of location in m previous translation iterations (in L-BFGS):
        transinfo['deltaR_mem'] = np.ndarray(shape=(0,D))
        # change of translational force in m previous translation iterations excluding the last one (in L-BFGS):
        transinfo['deltaF_mem'] = np.ndarray(shape=(0,D))
        # maximum number of previous translation iterations kept in memory (in L-BFGS):
        transinfo['num_lbfgsiter_trans'] = D
        # indicator of early stopping:
        not_relaxed = 0
        
        # INNER ITERATION LOOP
        for ind_iter in range(num_iter+1):
            
            if np.mod(ind_iter,100) == 0:
                print('Inner iteration {:g}.\n'.format(ind_iter))
            
            # calculate approximated energy and gradient at the middle point and image 1 of the dimer:
            R01 = np.vstack((R,R+dimer_sep*orient))
            E01, G01 = pot_gp(R01)
            
            # stop the relaxation phase if converged:
            E_R_gp = np.hstack((E_R_gp,E01[0,0]))
            maxF_R = np.max(np.abs(G01[0,:]))
            maxF_R_gp = np.hstack((maxF_R_gp,maxF_R))
            if maxF_R < T_dimer_gp:
                R_latest_conv = R.copy()
                orient_latest_conv = orient.copy()
                print('Stopped relaxation phase: converged after {:g} inner iterations.\n'.format(ind_iter))
                break
            
            # stop the relaxation phase if maximum number of inner iterations reached:
            if ind_iter == num_iter:
                if islarge_num_iter <= 0:
                    R_previous = R.copy()
                    orient_previous = orient.copy()
                print('Stopped relaxation phase: maximum number of inner iterations ({:g}) reached.\n'.format(ind_iter))
                not_relaxed = 1
                num_esmax = num_esmax + 1
                break
            
            # if necessary, rotate the dimer and re-calculate approximated energy and gradient at image 1:
            for ind_iter_rot in range(1,num_iter_rot_gp+1):
                orient_old_gp = orient.copy()
                orient, Curv, R_obs, E_obs, G_obs = method_rot(R,orient,G01,pot_gp,dimer_sep,T_anglerot_gp,0,rotinfo)
                if R_obs.shape[0] < 1:
                    break
                else:
                    R01[1,:] = R+dimer_sep*orient
                    E01[1,0], G01[1,:] = pot_gp(R01[1,:][np.newaxis])
                    if np.arccos(np.dot(orient[0,:],orient_old_gp[0,:])) < T_anglerot_gp:
                        break
            rotinfo['deltaR_mem'] = np.ndarray(shape=(0,D))
            rotinfo['deltaF_mem'] = np.ndarray(shape=(0,D))
            
            # translate the dimer:
            Curv = np.dot(-G01[0,:]+G01[1,:],orient[0,:])/dimer_sep
            R_new, R_obs, E_obs, G_obs = method_trans(R,orient,-G01[0,:][np.newaxis],Curv,param_trans,transinfo) 

            if actdist_fro < np.inf:
                # check if new active frozen atoms and update 'conf_info' and 'conf_info_inactive':
                new_act = utils_atomic.update_active_fro(conf_info,conf_info_inactive,np.vstack((R_new,R_obs)),actdist_fro)
                # if new active frozen atoms, update the GP model and reoptimize hyperparameters:
                if new_act > 0:
                    print('More frozen atoms activated. Now {:g} active and {:g} inactive frozen atoms.\n'.format(conf_info['conf_fro'].shape[0],conf_info_inactive['conf_ifro'].shape[0]))
                    gp_model.kern.sum.RBF_atomic.conf_info = conf_info
                    range_x = np.max(utils_atomic.dist_at(R_all,R_all,conf_info,np.ones(conf_info['n_pt'])))
                    len_prior = GPy.priors.Gaussian(mu=0.0,sigma=np.max((1,range_x/3)))
                    len_prior.domain = '_POSITIVE'
                    gp_model.kern.sum.RBF_atomic.lengthscale.set_prior(len_prior)
                    if ind_bigiter+1 <= num_bigiter_initparam or conf_info['n_pt'] > gp_model.kern.sum.RBF_atomic.lengthscale.shape[0]:
                        gp_model.kern.sum.RBF_atomic.lengthscale = norm.ppf(0.75,0,range_x/3)*np.ones(conf_info['n_pt'])
                    gp_model.set_XY([R_all]*(D+1),[E_all]+np.hsplit(G_all,D))
                    gp_model.optimize(optimizer=opt)
                    rotinfo['F_rot_old'] = np.ndarray(shape=(0,D))
                    rotinfo.['F_modrot_old'] = np.ndarray(shape=(0,D))
                    rotinfo.['orient_rot_oldplane'] = np.ndarray(shape=(0,D))
                    rotinfo.['cgiter_rot'] = 0
                    transinfo.['potential'] = pot_gp
                    transinfo.['F_trans_old'] = np.ndarray(shape=(0,D))
                    transinfo.['F_modtrans_old'] = np.ndarray(shape=(0,D))
                    transinfo.['V_old'] = np.ndarray(shape=(0,D))
                    transinfo.['zeroV'] = 1
                    transinfo.['cgiter_trans'] = 0
                    transinfo.['deltaR_mem'] = np.ndarray(shape=(0,D))
                    transinfo.['deltaF_mem'] = np.ndarray(shape=(0,D))

            # limit the move if any atom-wise step length is larger than 99 % of 0.5*(1-'ratio_at_limit') times the minimum inter-atomic distance:
            steplength = np.sqrt(np.sum(np.square(R_new-R)))
            steplength_atomwise = np.sqrt(np.square(R_new[:,0::3]-R[:,0::3])+np.square(R_new[:,1::3]-R[:,1::3])+np.square(R_new[:,2::3]-R[:,2::3])) # atom-wise step lengths
            steplength_atomwise_limit = 0.5*(1-ratio_at_limit)*utils_atomic.mindist_interatomic(R,conf_info)
            if any(steplength_atomwise > 0.99*steplength_atomwise_limit)
                step_coeff = 0.99*np.min(steplength_atomwise_limit/steplength_atomwise)
                print('Warning: the step length of inner iteration {:g} limited.\n'.format(ind_iter))
                R_new = R + step_coeff*(R_new-R)
                transinfo['F_trans_old'] = np.zeros((1,D))
                transinfo['F_modtrans_old'] = np.zeros((1,D))
                transinfo['V_old'] = np.zeros((1,D))
                transinfo['zeroV'] = 1
                transinfo['cgiter_trans'] = 0
                transinfo['deltaR_mem'] = np.ndarray(shape=(0,D))
                transinfo['deltaF_mem'] = np.ndarray(shape=(0,D))

            # STOPPING CRITERION FOR INTER-ATOMIC DISTANCES
            # reject the step and stop the relaxation phase if the following does not hold:
            # there is an observed data point so that all inter-atomic distances of the current image are more than 'ratio_at_limit'
            # (by default 2/3) but less than 1/'ratio_at_limit' (3/2) times the corresponding inter-atomic distance of the observed data point,
            # i.e., |log(r_im/r_nearobs)| < |log(ratio_at_limit)| ( = |log(2/3)| = 0.4055 )
            if ind_iter > 0:
                disp1D_nearest = np.min(utils_atomic.dist_max1Dlog(R_new,R_all,conf_info))
                if disp1D_nearest > np.abs(np.log(ratio_at_limit)):
                    eval_next_i = np.argmax(disp1D_nearest)+1
                    print('Stopped the relaxation phase after {:g} inner iterations: inter-atomic distance changes too much compared to "nearest" observed data point.\n'.format(ind_iter))
                    num_es = num_es + 1
                    break

            #### STOPPING CRITERION FOR JOINT MOVEMENT OF ATOMS (OPTIONAL)
            #### reject the step and stop the relaxation phase if there does not exist
            #### an observed data point that fulfils the following requirement:
            #### for all moving atoms, the change in the position of the atom between the current image and
            #### the observed data point is not more than 1/2 of the distance from the atom to its nearest
            #### neighbour atom in the current image or the observed data point
            ###if ind_iter > 0:
            ###    dispmaxrel_nearest = np.min(utils_atomic.dist_maxrel_atomwise3(R_new,R_all,conf_info))
            ###    if dispmaxrel_nearest > 0.5:
            ###        print('Stopped the relaxation phase after {:g} inner iterations: atom position changes too much compared to "nearest" observed data point.\n'.format(ind_iter))
            ###        num_es2 = num_es2 + 1
            ###        break
            
            # THE OLD STOPPING CRITERION FOR RAW DISPLACEMENT
            # reject the step and stop the relaxation phase if the distance from the middle point of the current dimer to the
            # nearest observed data point is larger than 'disp_max':
            if ind_iter > 0:
                    disp_nearest = np.sqrt(np.min(np.sum(np.square(R_new-R_all),1)))
                if disp_nearest > disp_max:
                    eval_next_i = np.argmax(disp_nearest)+1
                    print('Stopped the relaxation phase after {:g} inner iterations: dimer too far from the nearest observed data point.\n'.format(ind_iter))
                    num_es2 = num_es2 + 1
                    break

            # otherwise accept the step and continue the relaxation:
            R = R_new.copy()
            
        # END OF INNER ITERATION LOOP
        
        # acquire the accurate energy and gradient at the middle point of the dimer and add them to the data:
        E_R, G_R = pot_biased(R)
        if E_R.ndim < 2:
            print('ERROR: Modify your energy function so that it returns two-dimensional ndarrays (of shape ''N_im'' x 1 and ''N_im'' x ''D''), even if there is only one image in the input (''N_im'' = 1)!')
            return
        R_all = np.vstack((R_all,R))
        E_all = np.vstack((E_all,E_R))
        G_all = np.vstack((G_all,G_R))
        
        E_R_acc = np.hstack((E_R_acc,E_R[0,0]))
        maxF_R_acc = np.hstack((maxF_R_acc,np.max(np.abs(G_R))))
        obs_at = np.hstack((obs_at,E_R_gp.shape[0]-1))
        print('Accurate values: E_R = {:g}, maxF_R = {:g} \n\n'.format(E_R_acc[-1],maxF_R_acc[-1]))

        # stop the algorithm if final convergence is obtained:
        if maxF_R_acc[-1] < T_dimer:
            print('Final convergence obtained after {:g} relaxation phases (total number of image evaluations: {:g}).\n'.format(ind_bigiter,E_all.shape[0]-N_obs_init))
            break

        # stop the algorithm if maximum number of outer iterations is reached:
        if ind_bigiter == num_bigiter:
            print('Stopped the algorithm: Maximum number of outer iterations ({:g}) reached.\n'.format(i_eval+1))
            break
    
        # evaluate image 1 of the dimer if option 'eval_image1' is set on:
        if eval_image1 > 0:
            R1 = R + dimer_sep*orient
            E1, G1 = pot_biased(R1)
            R_all = np.vstack((R_all,R1))
            E_all = np.vstack((E_all,E1))
            G_all = np.vstack((G_all,G1))
        
    # END OF OUTER ITERATION LOOP
    
    obs_total = E_all.shape[0]-N_obs_init
        
    return R, orient, E_R, G_R, gp_model, R_all, E_all, G_all, obs_at, E_R_acc, E_R_gp, maxF_R_acc, maxF_R_gp, param_gp_initrot, param_gp, obs_initrot, obs_total, num_esmax, num_es1, num_es2
