%%%%% atomic_GP_dimer.m
%%%%% Copyright: Olli-Pekka Koistinen, Aalto University, 9.7.2020
%%%%%
%%%%% This function uses the atomic GP-dimer method to converge to a saddle point,
%%%%% starting from somewhere inside the convergence area.
%%%%%
%%%%% The relaxation of the dimer on the approximated energy surface
%%%%% is done according to a dimer method, where a rotation step rotates
%%%%% the dimer (a pair of images) towards its minimum energy orientation
%%%%% to find the lowest curvature mode of the potential energy and
%%%%% translation step moves the dimer towards the saddle point by
%%%%% inverting the force component in the direction of the dimer.
%%%%% After each relaxation phase, the energy and gradient are acquired at
%%%%% the middle point of the dimer, and the GP hyperparameters are reoptimized.
%%%%%
%%%%% The atomic version of GP-dimer uses a special GPstuff covariance function 'gpcf_sexpat.m'
%%%%% (not included in the GPstuff installation) where the distance between configurations C and C'
%%%%% is based on the changes of the inter-atomic distances. The distance function is implemented
%%%%% in 'dist_at.m', which is needed when defining the prior distributions for the lengthscales
%%%%% during the algorithm. When defining the stopping criteria for the path relaxation phases,
%%%%% also another distance function 'dist_max1Dlog.m' is needed. One more auxiliary function required
%%%%% is 'mindist_interatomic.m', which gives the minimum inter-atomic distance in a configuration.
%%%%% To update the set of active frozen atoms, also an auxiliary function 'update_active_fro.m' is needed.
%%%%% 
%%%%% Using the function requires that GPstuff is installed and added to the path in Matlab.
%%%%%
%%%%% Input:
%%%%%   pot_general            accurate potential and gradient function
%%%%%   conf_info              structure array including information about the configurations necessary for the GP model
%%%%%                           - conf_info.conf_fro: coordinates of active frozen atoms (N_fro x 3)
%%%%%                           - conf_info.atomtype_mov: atomtype indices for moving atoms (1 x N_mov)
%%%%%                           - conf_info.atomtype_fro: pairtype indices for active frozen atoms (1 x N_fro)
%%%%%                           - conf_info.pairtype: pairtype indices for pairs of atomtypes (n_at x n_at)
%%%%%                           - conf_info.n_pt: number of active pairtypes
%%%%%   conf_info_inactive     structure array including information about inactive frozen atoms
%%%%%                           - conf_info_inactive.conf_ifro: coordinates of inactive frozen atoms (N_ifro x 3)
%%%%%                           - conf_info_inactive.atomtype_ifro: atomtype indices for inactive frozen atoms (1 x N_ifro)
%%%%%   actdist_fro            activation distance for moving+frozen atom pairs (inf if all active)
%%%%%   R_all_init             coordinates of the initial data points (N_obs x D)
%%%%%   E_all_init             energies at the initial data points (N_obs x 1)
%%%%%   G_all_init             gradients at the initial data points (N_obs x D)
%%%%%   R_init                 coordinates of the middle point of the initial dimer (1 x D)
%%%%%   E_init                 energy at the middle point of the initial dimer (if not empty, 'R_init' should be included in 'R_all_init')
%%%%%   G_init                 gradient at the middle point of the initial dimer (1 x D) (if not empty, 'R_init' should be included in 'R_all_init')
%%%%%   orient_init            unit vector along the direction of the initial dimer (1 x D)
%%%%%   dimer_sep              dimer separation (distance from the middle point of the dimer to the two images)
%%%%%   method_rot             a function defining the rotation step
%%%%%   method_trans           a function defining the translation step
%%%%%   param_trans            parameters of the translation method (shape depends on 'method_trans')
%%%%%   eval_image1            indicator if image 1 of the dimer is evaluted (1) or not (0) after each relaxation phase
%%%%%                            in addition to the middle point of the dimer
%%%%%   T_dimer                final convergence threshold for 'maxF_R', which is the maximum component of the force acting on the middle point of the dimer
%%%%%                            (i.e., the algorithm is stopped when all components of the accurate force are below 'T_dimer')
%%%%%   initrot_nogp           indicator if the initial rotations are performed without GP (1) or with GP (0)
%%%%%   T_anglerot_init        convergence threshold for rotation angle in the initial rotations
%%%%%                            (the dimer is not rotated when the estimated rotation angle is less than this)
%%%%%   num_iter_initrot       maximum number of initial rotations (0 if initial rotations skipped)
%%%%%   inittrans_nogp         indicator if an initial test translation step is taken without GP (1)
%%%%%                            or if GP is used right after initial rotations (0)
%%%%%   T_anglerot_gp          convergence threshold for rotation angle during a relaxation phase
%%%%%   num_iter_rot_gp        maximum number of rotation iterations per translation during a relaxation phase
%%%%%   divisor_T_dimer_gp     if this option is set on (> 0), the convergence threshold for a relaxation phase is 1/'divisor_T_dimer_gp'
%%%%%                            of the smallest accurate 'maxF_R' obtained so far, but not less than 1/10 of 'T_dimer'
%%%%%                            (otherwise the GP convergence threshold is always 1/10 of 'T_dimer')
%%%%%   disp_max               maximum displacement of the middle point of the dimer from the nearest observed data point
%%%%%                            (the relaxation phase is stopped if 'disp_max' is reached)
%%%%%   ratio_at_limit         limit for the ratio (< 1) of inter-atomic distances between image and its "nearest" observed data point
%%%%%                            (the relaxation phase is stopped if 'ratio_at_limit' is reached for any image)
%%%%%   num_bigiter_initloc    number of outer iterations started from the initial location 'R_init'
%%%%%                            (after that, each relaxation phase is started from the latest converged dimer)
%%%%%   num_bigiter_initparam  number of outer iterations where the hyperparameter optimization is started
%%%%%                            from values initialized based on the range of the current data
%%%%%                            (after that, the optimization is started from the values of the previous round)
%%%%%   num_bigiter            maximum number of outer iterations (new pairs of observations)
%%%%%   num_iter               maximum number of inner iterations (steps during a relaxation phase)
%%%%%   islarge_num_iter       indicator if 'num_iter' is assumed to be much larger than required for dimer convergence on accurate energy surface
%%%%%                            (if not, the next relaxation phase is continued from the current path if 'num_iter' is reached)
%%%%%   load_file              path to the data file required to continue from a cancelled run (empty if started normally from the beginning)
%%%%%   save_file              path to the data file where data is saved (empty if not saved)
%%%%%
%%%%% Output:
%%%%%   R                      coordinates of the middle point of the final dimer (1 x D)
%%%%%   orient                 unit vector along the direction of the final dimer (1 x D)
%%%%%   E_R                    energy at the middle point of the final dimer
%%%%%   G_R                    gradient at the middle point of the final dimer (1 x D)
%%%%%   gp                     the final GP model
%%%%%   R_all                  coordinates of all observation points (N_obs x D)
%%%%%   E_all                  energies for all observation points (N_obs x 1)
%%%%%   G_all                  gradients for all observation points (N_obs x D)
%%%%%   obs_at                 total numbers of inner iterations before new observations were taken
%%%%%   E_R_acc                accurate energy of the middle point of the dimer for each outer iteration
%%%%%   E_R_gp                 approximated energy of the middle point of the dimer for each inner iteration
%%%%%   maxF_R_acc             accurate maximum component of force acting on the middle point of the dimer for each outer iteration
%%%%%   maxF_R_gp              approximated maximum component of force acting on the middle point of the dimer for each inner iteration
%%%%%   param_gp_initrot       optimized GP hyperparameters for each outer iteration during initial rotations
%%%%%   param_gp               optimized GP hyperparameters for each outer iteration
%%%%%   obs_initrot            number of observations required for initial rotations
%%%%%   obs_total              number of total observations
%%%%%   num_esmax              number of outer iterations stopped by maximum inner iteration limit
%%%%%   num_es1                number of outer iterations stopped by inter-atomic stopping criterion
%%%%%   num_es2                number of outer iterations stopeed by raw distance stopping criterion


function [R,orient,E_R,G_R,gp,R_all,E_all,G_all,obs_at,E_R_acc,E_R_gp,maxF_R_acc,maxF_R_gp,param_gp_initrot,param_gp,obs_initrot,obs_total,num_esmax,num_es1,num_es2] = ...
             atomic_GP_dimer(pot_general,conf_info,conf_info_inactive,actdist_fro,R_all_init,E_all_init,G_all_init,R_init,E_init,G_init,orient_init,dimer_sep,method_rot,method_trans, ...
             param_trans,eval_image1,T_dimer,initrot_nogp,T_anglerot_init,num_iter_initrot,inittrans_nogp,T_anglerot_gp,num_iter_rot_gp,divisor_T_dimer_gp, ...
             disp_max,ratio_at_limit,num_bigiter_initloc,num_bigiter_initparam,num_bigiter,num_iter,islarge_num_iter,load_file,save_file)
         
    if isempty(load_file)
         
        %%%     
        %%% THIS INFORMATION IS ASSUMED TO BE KNOWN BEFORE BEGINNING
        %%%
    
        N_obs_init = size(E_all_init,1);
        D = size(R_init,2); % dimension of the space
        if isempty(orient_init) % if 'orient_init' is empty, draw random unit vector
            orient_init = normrnd(zeros(1,D),ones(1,D));
        end
        orient_init = orient_init/sqrt(sum(orient_init.^2,2));
        obs_initrot = 0;
        obs_total = 0;
        num_esmax = 0;
        num_es1 = 0;
        num_es2 = 0;
    
        %%%
        %%% THE ALGORITHM BEGINS HERE
        %%%

        R = R_init; % coordinates of the middle point of the dimer
        orient = orient_init; % unit vector along the direction of the dimer
        if isempty(E_init)    
            [E_R,G_R] = pot_general(R); % energy and gradient at the middle point of the dimer
            Elevel = E_R; % set zero level of biased potential to the energy of the middle point of the initial dimer
            E_R = E_R - Elevel;
            R_all = [R_all_init;R]; % coordinates of all observation points
            E_all = [E_all_init-Elevel,E_R]; % energy for all observation points
            G_all = [G_all_init;G_R]; % gradient for all observation points
            pot_biased = @(R) subtract_Elevel(pot_general,R,Elevel); % define biased potential with zero level at 'Elevel'
            fprintf('Evaluated the middle point of the initial dimer.\n');
        else
            Elevel = E_init; % set zero level of biased potential to the energy of the middle point of the initial dimer
            pot_biased = @(R) subtract_Elevel(pot_general,R,Elevel); % define biased potential with zero level at 'Elevel'
            E_R = E_init - Elevel;
            G_R = G_init;
            R_all = R_all_init; % coordinates of all observation points
            E_all = E_all_init-Elevel; % energy for all observation points
            G_all = G_all_init; % gradient for all observation points
        end
        R1 = [];
        E1 = [];
        G1 = [];

        E_R_acc = E_R; % vector gathering accurate energy of the middle point of the dimer for each outer iteration
        maxF_R_acc = max(abs(G_R)); % vector gathering accurate maximum component of the force acting on the middle point of the dimer for each outer iteration 
        obs_at = 0; % vector gathering the total numbers of inner iterations before new observations were taken
        fprintf('Accurate values in the beginning: E_R = %1.3g, maxF_R = %1.3g \n\n',E_R_acc(:,end),maxF_R_acc(:,end));
    
        gp = [];
        param_gp_initrot = []; % optimized GP hyperparameters for each outer iteration during initial rotations
        E_R_gp = []; % vector gathering approximated energy of the middle point of the dimer for each inner iteration
        maxF_R_gp = []; % vector gathering approximated maximum component of the force acting on the middle point of the dimer for each inner iteration
        param_gp = []; % optimized GP hyperparameters for each relaxation phase

        % stop the algorithm if final convergence is obtained
        if maxF_R_acc(:,end) < T_dimer
            fprintf('Final convergence obtained in the beginning (%g image evaluations).\n', size(E_all,1)-N_obs_init);
            return;
        end
    
        % evaluate image 1 of the dimer if option 'eval_image1' is set on
        if eval_image1 > 0
            R1 = R + dimer_sep*orient;
            [E1,G1] = pot_biased(R1);
            R_all = [R_all;R1];
            E_all = [E_all;E1];
            G_all = [G_all;G1];
            fprintf('Evaluated image 1 of the initial dimer.\n');
        end

        % initialize the GP model
        cfc = gpcf_constant('constSigma2_prior',prior_fixed);
        [conf_info,conf_info_inactive,~] = update_active_fro(conf_info,conf_info_inactive,R_all,actdist_fro);
        fprintf('%g active and %g inactive frozen atoms in the beginning.\n', size(conf_info.conf_fro,1), size(conf_info_inactive.conf_ifro,1));
        cfat = gpcf_sexpat('magnSigma2_prior', prior_sqrtt('nu',20), 'lengthScale_prior', prior_gaussian(), 'conf_info', conf_info);
        lik = lik_gaussian('sigma2', 1e-8, 'sigma2_prior', prior_fixed);
        gp = gp_set('cf',{cfc,cfat}, 'lik', lik, 'deriv', D+1, 'jitterSigma2', 0);
        opt = optimset('TolFun', 1e-4, 'TolX', 1e-4, 'display', 'off');
        optimf = @fminscg;
    
        if num_iter_initrot > 0
        
            % evaluate image 1 of the dimer if not already done
            if eval_image1 < 1
                R1 = R + dimer_sep*orient;
                [E1,G1] = pot_biased(R1);
                R_all = [R_all;R1];
                E_all = [E_all;E1];
                G_all = [G_all;G1];
                fprintf('Evaluated image 1 of the initial dimer for initial rotations.\n');
            end
        
            if initrot_nogp < 1        

                for bigiter_initrot = 0:num_iter_initrot % OUTER ITERATION LOOP FOR INITIAL ROTATIONS

                    % stop initial rotations if converged
                    F_rot = force_rot([G_R;G1],orient,dimer_sep);
	                F_0 = sqrt(sum(F_rot.^2,2));
                    C_0 = (-G_R+G1)*orient'/dimer_sep;
                    dtheta = 0.5*atan(0.5*F_0/abs(C_0));
                    if dtheta < T_anglerot_init
                        fprintf('Rotated the initial dimer in %g outer iterations (total number of image evaluations: %g).\n', bigiter_initrot, size(E_all,1)-N_obs_init);
                        break;
                    end

                    % stop initial rotations if maximum number of outer iterations reached
                    if bigiter_initrot == num_iter_initrot
                        fprintf('WARNING: Tried to rotate the initial dimer, but maximum of %g outer iterations reached before convergence (total number of image evaluations: %g).\n', bigiter_initrot, size(E_all,1)-N_obs_init);
                        break;
                    end
                
                    % optimize the GP hyperparameters and calculate some variables unchanged during the inner iteration loop
                    mean_y = mean(E_all);
                    range_y = max(E_all)-min(E_all);
                    range_x = max(max(dist_at(R_all,R_all,conf_info,1)));
                    gp.cf{2}.magnSigma2 = norminv(0.75,0,range_y/3)^2;
                    gp.cf{2}.lengthScale = repmat(norminv(0.75,0,range_x/3),1,conf_info.n_pt);
                    gp.cf{1}.constSigma2 = max(1,mean_y^2);
                    gp.cf{2}.p.magnSigma2.s2 = max(1,(range_y/3)^2);
                    gp.cf{2}.p.lengthScale.s2 = max(1,(range_x/3)^2);
                    R_all2 = [repmat(R_all,D+1,1),reshape(repmat(0:D,size(R_all,1),1),[],1)];
                    gp = gp_optim(gp,R_all2,[E_all;G_all(:)],'opt',opt,'optimf',optimf);
                    param_gp_initrot = [param_gp_initrot;exp(gp_pak(gp))]
                    [~, C] = gp_trcov(gp, R_all2);
                    L = chol(C,'lower');
                    a = L'\(L\[E_all;G_all(:)]);
                    pot_gp = @(R) potential_gp(R,gp,R_all2,a);
            
                    % define the convergence threshold for the inner iteration loop
                    T_anglerot_init_gp = min(0.01,T_anglerot_init/10);

                    % define the initial dimer orientation for the inner iteration loop
                    orient_old = orient;
                    orient = orient_init;
            
                    %% in case of 2D space, plot the approximated energy surface and initial dimer
                    %if D == 2
                    %    plotscale = 0.2;
                    %    plotdensity = plotscale/100;
                    %    [X1,X2] = meshgrid(R_init(1,1)-plotscale:plotdensity:R_init(1,1)+plotscale,R_init(1,2)-plotscale:plotdensity:R_init(1,2)+plotscale);
                    %    figure()
                    %    Ef = pot_gp([X1(:),X2(:)]);
                    %    pcolor(X1,X2,reshape(Ef,size(X1,1),size(X1,2))),shading flat;
                    %    colorbar;
                    %    hold on;
                    %    axis equal tight;             
                    %    plot(R_all(:,1),R_all(:,2),'r+')
                    %    plot([R(1,1)-dimer_sep*orient(1,1);R(1,1)+dimer_sep*orient(1,1)],[R(1,2)-dimer_sep*orient(1,2);R(1,2)+dimer_sep*orient(1,2)],'r-')
                    %end

                    % calculate approximated energy and gradient at the middle point and image 1 of the dimer
                    R01 = [R;R+dimer_sep*orient];
                    [E01,G01] = pot_gp(R01);
            
                    rotinfo.F_rot_old = 0; % rotational force of the previous rotation iteration
                    rotinfo.F_modrot_old = 0; % modified rotational force of the previous rotation iteration
                    rotinfo.orient_rot_oldplane = 0; % unit vector perpendicular to 'orient' within the rotation plane of the previous rotation iteration
                    rotinfo.cgiter_rot = 0; % number of conjugated rotation iterations
                    rotinfo.num_cgiter_rot = D; % maximum number of conjugated rotation iterations before resetting the conjugate directions
                    rotinfo.deltaR_mem = []; % change of orientation in m previous rotation iterations (in L-BFGS)
                    rotinfo.deltaF_mem = []; % change of rotational force in m previous rotation iterations excluding the last one (in L-BFGS)
                    rotinfo.num_lbfgsiter_rot = D; % maximum number of previous rotation iterations kept in memory (in L-BFGS)
                    not_relaxed = 0; % indicator of early stopping
            
                    for iter = 0:num_iter % INNER ITERATION LOOP FOR INITIAL ROTATIONS
                
                        % stop the inner iteration loop if maximum number of iterations reached
                        if iter == num_iter
                            fprintf('WARNING: Maximum number of inner iterations (%g) reached during initial rotations.\n', iter)
                            not_relaxed = 1;
                            break;
                        end
                    
                        orient_old_gp = orient;

                        % if necessary, rotate the dimer and re-calculate approximated energy and gradient at image 1
                        [orient,~,R_obs,~,~,rotinfo] = method_rot(R,orient,G01,pot_gp,dimer_sep,T_anglerot_init_gp,0,rotinfo);
                        if isempty(R_obs)
                            break;
                        else
                            R01(2,:) = R+dimer_sep*orient;
                            [E01(2,1),G01(2,:)] = pot_gp(R01(2,:));
                            if acos(orient*orient_old_gp') < T_anglerot_init_gp
                                break;
                            end
                        end
                
                    end % END OF INNER ITERATION LOOP FOR INITIAL ROTATIONS
            
                    if iter < 1
                        fprintf('Rotated the initial dimer in %g outer iterations (total number of image evaluations: %g).\n', bigiter_initrot, size(E_all,1)-N_obs_init);
                        fprintf('WARNING: Dimer orientation converged on the GP surface (T_anglerot_init_gp = %g), but not on the true PES (T_anglerot_init = %g).\n', T_anglerot_init_gp, T_anglerot_init);
                        break;
                    else
                        if bigiter_initrot > 0
                            if acos(orient*orient_old') < T_anglerot_init
                                fprintf('Rotated the initial dimer in %g outer iterations (total number of image evaluations: %g).\n', bigiter_initrot+1, size(E_all,1)-N_obs_init);
                                break;
                            end
                        end
                        R1 = R+dimer_sep*orient;
                        [E1,G1] = pot_biased(R1);
                        R_all = [R_all;R1];
                        E_all = [E_all;E1];
                        G_all = [G_all;G1];
                    end

                    %% in case of 2D space, plot the relaxed dimer
                    %if D == 2
                    %    plot([R(1,1)-dimer_sep*orient(1,1);R(1,1)+dimer_sep*orient(1,1)],[R(1,2)-dimer_sep*orient(1,2);R(1,2)+dimer_sep*orient(1,2)],'y-') 
                    %    if not_relaxed > 0
                    %        title(['Approximated energy surface on initial rotation round ',num2str(bigiter_initrot+1),', dimer rotation stopped early']);
                    %    else
                    %        title(['Approximated energy surface on initial rotation round ',num2str(bigiter_initrot+1),', rotated dimer']);
                    %    end
                    %end
                
                end % END OF OUTER ITERATION LOOP FOR INITIAL ROTATIONS
        
            else
        
                rotinfo.F_rot_old = 0; % rotational force of the previous rotation iteration
                rotinfo.F_modrot_old = 0; % modified rotational force of the previous rotation iteration
                rotinfo.orient_rot_oldplane = 0; % unit vector perpendicular to 'orient' within the rotation plane of the previous rotation iteration
                rotinfo.cgiter_rot = 0; % number of conjugated rotation iterations
                rotinfo.num_cgiter_rot = D; % maximum number of conjugated rotation iterations before resetting the conjugate directions
                rotinfo.deltaR_mem = []; % change of orientation in m previous rotation iterations (in L-BFGS)
                rotinfo.deltaF_mem = []; % change of rotational force in m previous rotation iterations excluding the last one (in L-BFGS)
                rotinfo.num_lbfgsiter_rot = D; % maximum number of previous rotation iterations kept in memory (in L-BFGS)
            
                for iter_initrot = 0:num_iter_initrot % ITERATION LOOP FOR INITIAL ROTATIONS
            
                    % stop initial rotations if maximum number of iterations reached
                    if iter_initrot == num_iter_initrot
                        fprintf('WARNING: Tried to rotate the initial dimer, but maximum of %g iterations reached before convergence (total number of image evaluations: %g).\n', iter_initrot, size(E_all,1)-N_obs_init);
                        break;
                    end
            
                    % if necessary, rotate the dimer and re-evaluate energy and gradient at image 1
                    [orient,~,R_obs,E_obs,G_obs,rotinfo] = method_rot(R,orient,[G_R;G1],pot_biased,dimer_sep,T_anglerot_init,0,rotinfo);
                    if isempty(R_obs)
                        fprintf('Rotated the initial dimer %g times (total number of image evaluations: %g).\n', iter_initrot, size(E_all,1)-N_obs_init);
                        break;
                    else
                        R1 = R+dimer_sep*orient;
                        [E1,G1] = pot_biased(R1);
                        R_all = [R_all;R_obs;R1];
                        E_all = [E_all;E_obs;E1];
                        G_all = [G_all;G_obs;G1];
                    end
            
                end % END OF ITERATION LOOP FOR INITIAL ROTATIONS
        
            end
        
        end
    
        obs_initrot = size(E_all,1)-N_obs_init;
    
        if inittrans_nogp > 0
        
            if isempty(G1)
                R1 = R + dimer_sep*orient;
                [E1,G1] = pot_biased(R1);
                R_all = [R_all;R1];
                E_all = [E_all;E1];
                G_all = [G_all;G1];
                fprintf('Evaluated image 1 of the dimer for an initial translation step.\n');
            end
        
            % translate the dimer once with a test step and evaluate energy and gradient at the middle point of the dimer
            Curv = (-G_R+G1)*orient'/dimer_sep
            F_trans = -G_R + 2*(G_R*orient')*orient;
            R = R + 0.5*F_trans/abs(Curv);
            fprintf('Translated the dimer once with a test step, and evaluated the middle point.\n');
            [E_R,G_R] = pot_biased(R);
            R_all = [R_all;R];
            E_all = [E_all;E_R];
            G_all = [G_all;G_R];
            fprintf('Accurate values after an initial translation step: E_R = %1.3g, maxF_R = %1.3g \n\n',E_R,max(abs(G_R)));
        
            % stop the algorithm if final convergence is obtained
            if max(abs(G_R)) < T_dimer
                fprintf('Final convergence obtained after an initial translation step (%g image evaluations).\n', size(E_all,1)-N_obs_init);
                return;
            end
        
            % evaluate image 1 of the dimer if option 'eval_image1' is set on
            if eval_image1 > 0
                R1 = R + dimer_sep*orient;
                [E1,G1] = pot_biased(R1);
                R_all = [R_all;R1];
                E_all = [E_all;E1];
                G_all = [G_all;G1];
                fprintf('Evaluated image 1 of the dimer.\n');
            end
        
            if actdist_fro < inf
                % check if new active frozen atoms and update 'conf_info' and 'conf_info_inactive'
                [conf_info,conf_info_inactive,new_act] = update_active_fro(conf_info,conf_info_inactive,R,actdist_fro);
                % if new active frozen atoms, update the GP model
                if new_act > 0
                    fprintf('More frozen atoms activated. Now %g active and %g inactive frozen atoms.\n', size(conf_info.conf_fro,1), size(conf_info_inactive.conf_ifro,1));
                    gp.cf{2}.conf_info = conf_info;
                end
            end
    
        end
        
        orient_init_gp = orient;
        R_latest_conv = [];
        orient_latest_conv = [];
        R_previous = [];
        orient_previous = [];
    
        if size(R_all,1) < 2
            R1 = R + dimer_sep*orient;
            [E1,G1] = pot_biased(R1);
            R_all = [R_all;R1];
            E_all = [E_all;E1];
            G_all = [G_all;G1];
            fprintf('Evaluated image 1 of the dimer for the initial GP model.\n');
        end
    
        bigiter_start = 1;
    else
        load(load_file);
        bigiter_start = bigiter;
    end
    
    for bigiter = bigiter_start:num_bigiter % OUTER ITERATION LOOP
        
        if ~isempty(save_file)
            save(save_file);
        end    
        
        % optimize the GP hyperparameters and calculate some variables unchanged during the relaxation
        mean_y = mean(E_all);
        range_y = max(E_all)-min(E_all);
        range_x = max(max(dist_at(R_all,R_all,conf_info,1)));
        gp.cf{1}.constSigma2 = max(1,mean_y^2);
        gp.cf{2}.p.magnSigma2.s2 = max(1,(range_y/3)^2);
        gp.cf{2}.p.lengthScale.s2 = max(1,(range_x/3)^2);
        if bigiter < 2 || bigiter <= num_bigiter_initparam || conf_info.n_pt > size(gp.cf{2}.lengthScale,2)
            gp.cf{2}.magnSigma2 = norminv(0.75,0,range_y/3)^2;
            gp.cf{2}.lengthScale = repmat(norminv(0.75,0,range_x/3),1,conf_info.n_pt);
        end
        R_all2 = [repmat(R_all,D+1,1),reshape(repmat(0:D,size(R_all,1),1),[],1)];
        gp = gp_optim(gp,R_all2,[E_all;G_all(:)],'opt',opt,'optimf',optimf);
        param_gp = [param_gp;exp(gp_pak(gp))]
        [~, C] = gp_trcov(gp, R_all2);
        L = chol(C,'lower');
        a = L'\(L\[E_all;G_all(:)]);
        pot_gp = @(R) potential_gp(R,gp,R_all2,a);
        
        % define the convergence threshold for the relaxation phase
        if divisor_T_dimer_gp > 0
            % if this option is set on, the GP convergence threshold is 1/'divisor_T_MEP_gp' of the smallest accurate 'maxF_R' obtained so far,
            % but not less than 1/10 of the final threshold
            T_dimer_gp = max([min(maxF_R_acc)/divisor_T_dimer_gp,T_dimer/10]);
        else
            % otherwise the GP convergence threshold is always 1/10 of the final threshold
            T_dimer_gp = T_dimer/10;
        end
        
        % define the initial dimer for the relaxation phase
        if islarge_num_iter > 0 || isempty(R_previous)
            if bigiter > num_bigiter_initloc && ~isempty(R_latest_conv)
                R = R_latest_conv;
                orient = orient_latest_conv;
                fprintf('Started relaxation phase from the latest converged dimer.\n');
            else
                R = R_init;
                orient = orient_init_gp;
                fprintf('Started relaxation phase from the initial location.\n');
            end
        else
            R = R_previous;
            orient = orient_previous;
            fprintf('Started relaxation phase where the previous one stopped.\n');
            R_previous = [];
            orient_previous = [];
        end
        
        %% in case of 2D space, plot the approximated energy surface and initial dimer
        %if D == 2
        %    plotscale = 0.2;
        %    plotdensity = plotscale/100;
        %    [X1,X2] = meshgrid(R(1,1)-plotscale:plotdensity:R(1,1)+plotscale,R(1,2)-plotscale:plotdensity:R(1,2)+plotscale);
        %    figure()
        %    Ef = pot_gp([X1(:),X2(:),zeros(size(X1(:)))]);
        %    pcolor(X1,X2,reshape(Ef,size(X1,1),size(X1,2))),shading flat;
        %    colorbar;
        %    hold on;
        %    axis equal tight;             
        %    plot(R_all(:,1),R_all(:,2),'r+')
        %    plot([R(1,1)-dimer_sep*orient(1,1);R(1,1)+dimer_sep*orient(1,1)],[R(1,2)-dimer_sep*orient(1,2);R(1,2)+dimer_sep*orient(1,2)],'r-')
        %end
        
        rotinfo.F_rot_old = 0; % rotational force of the previous rotation iteration
        rotinfo.F_modrot_old = 0; % modified rotational force of the previous rotation iteration
        rotinfo.orient_rot_oldplane = 0; % unit vector perpendicular to 'orient' within the rotation plane of the previous rotation iteration
        rotinfo.cgiter_rot = 0; % number of conjugated rotation iterations
        rotinfo.num_cgiter_rot = D; % maximum number of conjugated rotation iterations before resetting the conjugate directions
        rotinfo.deltaR_mem = []; % change of orientation in m previous rotation iterations (in L-BFGS)
        rotinfo.deltaF_mem = []; % change of rotational force in m previous rotation iterations excluding the last one (in L-BFGS)
        rotinfo.num_lbfgsiter_rot = D; % maximum number of previous rotation iterations kept in memory (in L-BFGS)
        transinfo.potential = pot_gp;
        transinfo.F_trans_old = 0; % translational force of the previous translation iteration
        transinfo.F_modtrans_old = 0; % modified translational force of the previous translation iteration
        transinfo.V_old = 0; % velocity of the middle point of the dimer in the previous translation iteration
        transinfo.zeroV = 1; % indicator if zero velocity used
        transinfo.cgiter_trans = 0; % number of conjugated transition iterations
        transinfo.num_cgiter_trans = D; % maximum number of conjugated transition iterations before resetting the conjugate directions
        transinfo.deltaR_mem = []; % change of location in m previous translation iterations (in L-BFGS)
        transinfo.deltaF_mem = []; % change of translational force in m previous translation iterations excluding the last one (in L-BFGS)
        transinfo.num_lbfgsiter_trans = D; % maximum number of previous translation iterations kept in memory (in L-BFGS)       
        not_relaxed = 0; % indicator of early stopping
        
        for iter = 0:num_iter % INNER ITERATION LOOP
            
            if mod(iter,100) == 0
               fprintf('Inner iteration %g.\n',iter);
            end
            
            % calculate approximated energy and gradient at the middle point and image 1 of the dimer
            R01 = [R;R+dimer_sep*orient];
            [E01,G01] = pot_gp(R01);
            
            % stop the relaxation phase if converged
            E_R_gp = [E_R_gp,E01(1,1)];
            maxF_R = max(abs(G01(1,:)));
            maxF_R_gp = [maxF_R_gp,maxF_R];
            if maxF_R < T_dimer_gp
                R_latest_conv = R;
                orient_latest_conv = orient;
                fprintf('Stopped relaxation phase: converged after %g inner iterations.\n', iter);
                break;
            end
            
            % stop the relaxation phase if maximum number of inner iterations reached
            if iter == num_iter
                if islarge_num_iter <= 0
                    R_previous = R;
                    orient_previous = orient;
                end
                fprintf('Stopped relaxation phase: maximum number of inner iterations (%g) reached.\n', iter)
                not_relaxed = 1;
                num_esmax = num_esmax + 1;
                break;
            end
            
            % if necessary, rotate the dimer and re-calculate approximated energy and gradient at image 1
            for iter_rot = 1:num_iter_rot_gp
                orient_old_gp = orient;
                [orient,~,R_obs,~,~,rotinfo] = method_rot(R,orient,G01,pot_gp,dimer_sep,T_anglerot_gp,0,rotinfo);
                if isempty(R_obs)
                    break;
                else
                    R01(2,:) = R+dimer_sep*orient;
                    [E01(2,1),G01(2,:)] = pot_gp(R01(2,:));
                    if acos(orient*orient_old_gp') < T_anglerot_gp
                        break;
                    end
                end
            end
            rotinfo.deltaR_mem = [];
            rotinfo.deltaF_mem = [];
            
            % translate the dimer
            Curv = (-G01(1,:)+G01(2,:))*orient'/dimer_sep;
            [R_new,~,~,~,transinfo] = method_trans(R,orient,-G01(1,:),Curv,param_trans,transinfo);          

            if actdist_fro < inf
                % check if new active frozen atoms and update 'conf_info' and 'conf_info_inactive'
                [conf_info,conf_info_inactive,new_act] = update_active_fro(conf_info,conf_info_inactive,[R_new;R_obs],actdist_fro);
                % if new active frozen atoms, update the GP model and reoptimize hyperparameters
                if new_act > 0
                    fprintf('More frozen atoms activated. Now %g active and %g inactive frozen atoms.\n', size(conf_info.conf_fro,1), size(conf_info_inactive.conf_ifro,1));
                    gp.cf{2}.conf_info = conf_info;
                    range_x = max(max(dist_at(R_all,R_all,conf_info,1)));
                    gp.cf{2}.p.lengthScale.s2 = max(1,(range_x/3)^2);
                    if bigiter+1 <= num_bigiter_initparam || conf_info.n_pt > size(gp.cf{2}.lengthScale,2)
                        gp.cf{2}.lengthScale = repmat(norminv(0.75,0,range_x/3),1,conf_info.n_pt);
                    end
                    R_all2 = [repmat(R_all,D+1,1),reshape(repmat(0:D,size(R_all,1),1),[],1)];
                    gp = gp_optim(gp,R_all2,[E_all;G_all(:)],'opt',opt,'optimf',optimf);
                    [~, C] = gp_trcov(gp, R_all2);
                    L = chol(C,'lower');
                    a = L'\(L\[E_all;G_all(:)]);
                    pot_gp = @(R) potential_gp(R,gp,R_all2,a);
                    rotinfo.F_rot_old = 0;
                    rotinfo.F_modrot_old = 0;
                    rotinfo.orient_rot_oldplane = 0;
                    rotinfo.cgiter_rot = 0;
                    transinfo.potential = pot_gp;
                    transinfo.F_trans_old = 0;
                    transinfo.F_modtrans_old = 0;
                    transinfo.V_old = 0;
                    transinfo.zeroV = 1;
                    transinfo.cgiter_trans = 0;
                    transinfo.deltaR_mem = [];
                    transinfo.deltaF_mem = [];
                end
            end

            % limit the move if any atom-wise step length is larger than 99 % of 0.5*(1-'ratio_at_limit') times the minimum inter-atomic distance
            steplength = sqrt(sum((R_new-R).^2,2));
            steplength_atomwise = sqrt((R_new(1,1:3:end)-R(1,1:3:end)).^2+(R_new(1,2:3:end)-R(1,2:3:end)).^2+(R_new(1,3:3:end)-R(1,3:3:end)).^2); % atom-wise step lengths
            steplength_atomwise_limit = 0.5*(1-ratio_at_limit)*mindist_interatomic(R,conf_info);
            if any(steplength_atomwise > 0.99*steplength_atomwise_limit)
                step_coeff = 0.99*min(steplength_atomwise_limit./steplength_atomwise);
                fprintf('Warning: the step length of inner iteration %g limited.\n', iter)
                R_new = R + step_coeff*(R_new-R);
                transinfo.F_trans_old = 0;
                transinfo.F_modtrans_old = 0;
                transinfo.V_old = 0;
                transinfo.zeroV = 1;
                transinfo.cgiter_trans = 0;
                transinfo.deltaR_mem = [];
                transinfo.deltaF_mem = [];
            end

            % STOPPING CRITERION FOR INTER-ATOMIC DISTANCES
            % reject the step and stop the relaxation phase if the following does not hold:
            % there is an observed data point so that all inter-atomic distances of the current image are more than 'ratio_at_limit'
            % (by default 2/3) but less than 1/'ratio_at_limit' (3/2) times the corresponding inter-atomic distance of the observed data point,
            % i.e., |log(r_im/r_nearobs)| < |log(ratio_at_limit)| ( = |log(2/3)| = 0.4055 )
            if iter > 0
                disp1D_nearest = min(dist_max1Dlog(R_new,R_all,conf_info));
                if disp1D_nearest > abs(log(ratio_at_limit))
                    fprintf('Stopped the relaxation phase after %g inner iterations: inter-atomic distance changes too much compared to "nearest" observed data point.\n', iter)
                    num_es1 = num_es1 + 1;
                    break;
                end
            end

            % STOPPING CRITERION FOR JOINT MOVEMENT OF ATOMS
            % reject the step and stop the relaxation phase if there does not exist
            % an observed data point that fulfils the following requirement:
            % for all moving atoms, the change in the position of the atom between the current image and
            % the observed data point is not more than 1/2 of the distance from the atom to its nearest
            % neighbour atom in the current image or the observed data point           
            %if iter > 0
            %    dispmaxrel_nearest = min(dist_maxrel_atomwise3(R_new,R_all,conf_info));
            %    if dispmaxrel_nearest > 1/2
            %        fprintf('Stopped the relaxation phase after %g inner iterations: atom position changes too much compared to "nearest" observed data point.\n', iter)
            %        num_es2 = num_es2 + 1;
            %        break;
            %    end
            %end
            
            %{
            % THE OLD STOPPING CRITERION FOR RAW DISPLACEMENT
            % reject the step and stop the relaxation phase if the distance from the middle point of the current dimer to the
            % nearest observed data point is larger than 'disp_max'
            if iter > 0
                disp_nearest = sqrt(min(sum((repmat(R_new(1,:),size(R_all,1),1)-R_all).^2,2)));
                if disp_nearest > disp_max
                    fprintf('Stopped the relaxation phase after %g inner iterations: dimer too far from the nearest observed data point.\n', iter)
                    num_es2 = num_es2 + 1;
                    break;
                end
            end
            %}
            
            % otherwise accept the step and continue the relaxation
            R = R_new;
            
        end % END OF INNER ITERATION LOOP
        
        % acquire the accurate energy and gradient at the middle point of the dimer and add them to the data
        [E_R,G_R] = pot_biased(R);
        R_all = [R_all;R];
        E_all = [E_all;E_R];
        G_all = [G_all;G_R];
        
        %% in case of 2D space, plot the relaxed dimer
        %if D == 2
        %    plot([R(1,1)-dimer_sep*orient(1,1);R(1,1)+dimer_sep*orient(1,1)],[R(1,2)-dimer_sep*orient(1,2);R(1,2)+dimer_sep*orient(1,2)],'y-') 
        %    if not_relaxed > 0
        %        title(['Approximated energy surface on round ',num2str(bigiter),', dimer relaxation stopped early']);
        %    else
        %        title(['Approximated energy surface on round ',num2str(bigiter),', relaxed dimer']);
        %    end
        %end
        
        E_R_acc = [E_R_acc,E_R];
        maxF_R_acc = [maxF_R_acc,max(abs(G_R))];
        obs_at = [obs_at;size(E_R_gp,2)-1];
        fprintf('Accurate values: E_R = %1.3g, maxF_R = %1.3g \n\n',E_R_acc(:,end),maxF_R_acc(:,end));

        % stop the algorithm if final convergence is obtained
        if maxF_R_acc(:,end) < T_dimer
            fprintf('Final convergence obtained after %g relaxation phases (total number of image evaluations: %g).\n', bigiter, size(E_all,1)-N_obs_init);
            break;
        end

        % stop the algorithm if maximum number of outer iterations is reached
        if bigiter == num_bigiter
            fprintf('Stopped the algorithm: Maximum number of outer iterations (%g) reached.\n', bigiter);
            break;
        end
    
        % evaluate image 1 of the dimer if option 'eval_image1' is set on
        if eval_image1 > 0
            R1 = R + dimer_sep*orient;
            [E1,G1] = pot_biased(R1);
            R_all = [R_all;R1];
            E_all = [E_all;E1];
            G_all = [G_all;G1];
        end
        
    end % END OF OUTER ITERATION LOOP
    
    obs_total = size(E_all,1)-N_obs_init;
        
end
