%%%%% gpcf_matern32at.m
%%%%% Copyright: Olli-Pekka Koistinen, Aalto University, 11.7.2018
%%%%% 
%%%%% This function is a special GPstuff covariance function for atomic
%%%%% configurations. It is a version of 'gpcf_matern32.m' where the
%%%%% distance between configurations C and C' is based on the changes of
%%%%% the inter-atomic distances:
%%%%%
%%%%% dist(C,C') = sqrt(SUM_ij{[(1/r_ij-1/r_ij')/l_ij]^2}), where r_ij and
%%%%% r_ij' are the distances between atoms i and j in configurations C and
%%%%% C', respectively, and l_ij is the lengthscale of the corresponding
%%%%% atom pair type.
%%%%%
%%%%% The input vectors x are assumed to be row vectors including the
%%%%% coordinates of the moving atoms: [x_1,y_1,z_1,x_2,y_2,z_2,...].
%%%%%
%%%%% The parameter 'conf_info' is a structure array including necessary information about the configurations:
%%%%% conf_info.conf_fro: coordinates of active frozen atoms (N_fro x 3)
%%%%% conf_info.atomtype_mov: atomtype indices for moving atoms (1 x N_mov)
%%%%% conf_info.atomtype_fro: atomtype indices for active frozen atoms (1 x N_fro)
%%%%% Atomtypes must be indexed as 1,2,...,n_at (may include also inactive atomtypes).
%%%%% conf_info.pairtype: pairtype indices for pairs of atomtypes (n_at x n_at)
%%%%% conf_info.n_pt: number of active pairtypes
%%%%% Active pairtypes are indexed as 1,2,...,n_pt. Inactive pairtypes are given index 0.

function gpcf = gpcf_matern32at(varargin)
%GPCF_MATERN32  Create a Matern nu=3/2 covariance function
%
%  Description
%    GPCF = GPCF_MATERN32('PARAM1',VALUE1,'PARAM2,VALUE2,...) 
%    creates Matern nu=3/2 covariance function structure in which
%    the named parameters have the specified values. Any
%    unspecified parameters are set to default values.
%
%    GPCF = GPCF_MATERN32(GPCF,'PARAM1',VALUE1,'PARAM2,VALUE2,...) 
%    modify a covariance function structure with the named
%    parameters altered with the specified values.
%
%    Parameters for Matern nu=3/2 covariance function [default]
%      magnSigma2        - magnitude (squared) [0.1]
%      lengthScale       - length scale for each input. [1]
%                          This can be either scalar corresponding
%                          to an isotropic function or vector
%                          defining own length-scale for each input
%                          direction.
%      magnSigma2_prior  - prior for magnSigma2  [prior_logunif]
%      lengthScale_prior - prior for lengthScale [prior_t]
%      metric            - metric structure used by the covariance function []
%      selectedVariables - vector defining which inputs are used [all]
%                          selectedVariables is shorthand for using
%                          metric_euclidean with corresponding components
%
%    Note! If the prior is 'prior_fixed' then the parameter in
%    question is considered fixed and it is not handled in
%    optimization, grid integration, MCMC etc.
%
%  See also
%    GP_SET, GPCF_*, PRIOR_*, METRIC_*
%
% Copyright (c) 2007-2010 Jarno Vanhatalo
% Copyright (c) 2010 Aki Vehtari
% Copyright (c) 2014 Arno Solin and Jukka Koskenranta

% This software is distributed under the GNU General Public
% License (version 3 or later); please refer to the file
% License.txt, included with the software, for details.

  % inputParser checks the arguments and assigns some default values
  ip=inputParser;
  ip.FunctionName = 'GPCF_MATERN32AT';
  ip.addOptional('gpcf', [], @isstruct);
  ip.addParamValue('magnSigma2',0.1, @(x) isscalar(x) && x>0);
  ip.addParamValue('lengthScale',1, @(x) isvector(x) && all(x>0));
  ip.addParamValue('metric',[], @isstruct);
  ip.addParamValue('magnSigma2_prior', prior_logunif(), ...
                   @(x) isstruct(x) || isempty(x));
  ip.addParamValue('lengthScale_prior',prior_t(), ...
                   @(x) isstruct(x) || isempty(x));
  ip.addParamValue('selectedVariables',[], @(x) isempty(x) || ...
                   (isvector(x) && all(x>0)));
  ip.addParamValue('conf_info',[], @(x) isstruct(x));
  ip.parse(varargin{:});
  gpcf=ip.Results.gpcf;

  if isempty(gpcf)
    init=true;
    gpcf.type = 'gpcf_matern32at';
  else
    if ~isfield(gpcf,'type') && ~isequal(gpcf.type,'gpcf_matern32at')
      error('First argument does not seem to be a valid covariance function structure')
    end
    init=false;
  end
  
  % Initialize parameters
  if init || ~ismember('lengthScale',ip.UsingDefaults)
    gpcf.lengthScale = ip.Results.lengthScale;
  end
  if init || ~ismember('magnSigma2',ip.UsingDefaults)
    gpcf.magnSigma2 = ip.Results.magnSigma2;
  end
  if init || ~ismember('conf_info',ip.UsingDefaults)
    gpcf.conf_info = ip.Results.conf_info;
  end

  % Initialize prior structure
  if init
    gpcf.p=[];
  end
  if init || ~ismember('lengthScale_prior',ip.UsingDefaults)
    gpcf.p.lengthScale=ip.Results.lengthScale_prior;
  end
  if init || ~ismember('magnSigma2_prior',ip.UsingDefaults)
    gpcf.p.magnSigma2=ip.Results.magnSigma2_prior;
  end

  %Initialize metric
  if ~ismember('metric',ip.UsingDefaults)
    if ~isempty(ip.Results.metric)
      gpcf.metric = ip.Results.metric;
      gpcf = rmfield(gpcf, 'lengthScale');
      gpcf.p = rmfield(gpcf.p, 'lengthScale');
    elseif isfield(gpcf,'metric')
      if ~isfield(gpcf,'lengthScale')
        gpcf.lengthScale = gpcf.metric.lengthScale;
      end
      if ~isfield(gpcf.p,'lengthScale')
        gpcf.p.lengthScale = gpcf.metric.p.lengthScale;
      end
      gpcf = rmfield(gpcf, 'metric');
    end
  end
  
  % selectedVariables 
  if ~ismember('selectedVariables',ip.UsingDefaults)
    if ~isfield(gpcf,'metric')
      gpcf.selectedVariables = ip.Results.selectedVariables;
    elseif isfield(gpcf,'metric') 
      if ~isempty(ip.Results.selectedVariables)
        gpcf.metric=metric_euclidean(gpcf.metric,...
                                     'components',...
                                     num2cell(ip.Results.selectedVariables));
        if ~ismember('lengthScale',ip.UsingDefaults)
          gpcf.metric.lengthScale=ip.Results.lengthScale;
          gpcf = rmfield(gpcf, 'lengthScale');
        end
        if ~ismember('lengthScale_prior',ip.UsingDefaults)
          gpcf.metric.p.lengthScale=ip.Results.lengthScale_prior;
          gpcf.p = rmfield(gpcf.p, 'lengthScale');
        end
      else
        if ~isfield(gpcf,'lengthScale')
          gpcf.lengthScale = gpcf.metric.lengthScale;
        end
        if ~isfield(gpcf.p,'lengthScale')
          gpcf.p.lengthScale = gpcf.metric.p.lengthScale;
        end
        gpcf = rmfield(gpcf, 'metric');
      end
    end
  end
  
  if init
    % Set the function handles to the subfunctions
    gpcf.fh.pak = @gpcf_matern32at_pak;
    gpcf.fh.unpak = @gpcf_matern32at_unpak;
    gpcf.fh.lp = @gpcf_matern32at_lp;
    gpcf.fh.lpg= @gpcf_matern32at_lpg;
    gpcf.fh.cfg = @gpcf_matern32at_cfg;
    gpcf.fh.cfdg = @gpcf_matern32at_cfdg;
    gpcf.fh.cfdg2 = @gpcf_matern32at_cfdg2;
    gpcf.fh.ginput = @gpcf_matern32at_ginput;
    gpcf.fh.ginput2 = @gpcf_matern32at_ginput2;
    gpcf.fh.ginput3 = @gpcf_matern32at_ginput3;
    gpcf.fh.ginput4 = @gpcf_matern32at_ginput4;
    gpcf.fh.cov = @gpcf_matern32at_cov;
    gpcf.fh.trcov  = @gpcf_matern32at_trcov;
    gpcf.fh.trvar  = @gpcf_matern32at_trvar;
    gpcf.fh.recappend = @gpcf_matern32at_recappend;
    gpcf.fh.cf2ss = @gpcf_matern32at_cf2ss;
  end

end

function [w,s,h] = gpcf_matern32at_pak(gpcf)
%GPCF_MATERN32_PAK  Combine GP covariance function parameters into
%                   one vector
%
%  Description
%    W = GPCF_MATERN32_PAK(GPCF) takes a covariance function
%    structure GPCF and combines the covariance function
%    parameters and their hyperparameters into a single row
%    vector W. This is a mandatory subfunction used 
%    for example in energy and gradient computations.
%
%       w = [ log(gpcf.magnSigma2)
%             (hyperparameters of gpcf.magnSigma2)
%             log(gpcf.lengthScale(:))
%             (hyperparameters of gpcf.lengthScale)]'
%
%  See also
%    GPCF_MATERN32_UNPAK

  w=[];s={}; h=[];
  
  if ~isempty(gpcf.p.magnSigma2)
    w = [w log(gpcf.magnSigma2)];
    s = [s; 'log(matern32.magnSigma2)'];
    h = [h 1];
    % Hyperparameters of magnSigma2
    [wh, sh, hh] = gpcf.p.magnSigma2.fh.pak(gpcf.p.magnSigma2);
    sh=strcat(repmat('prior-', size(sh,1),1),sh);
    w = [w wh];
    s = [s; sh];
    h = [h 1+hh];
  end        

  if isfield(gpcf,'metric')
    [wh sh]=gpcf.metric.fh.pak(gpcf.metric);
    w = [w wh];
    s = [s; sh];
  else
    if ~isempty(gpcf.p.lengthScale)
      w = [w log(gpcf.lengthScale)];
      if numel(gpcf.lengthScale)>1
        s = [s; sprintf('log(matern32.lengthScale x %d)',numel(gpcf.lengthScale))];
      else
        s = [s; 'log(matern32.lengthScale)'];
      end
      h = [h ones(1,numel(gpcf.lengthScale))];
      % Hyperparameters of lengthScale
      [wh  sh, hh] = gpcf.p.lengthScale.fh.pak(gpcf.p.lengthScale);
      sh=strcat(repmat('prior-', size(sh,1),1),sh);
      w = [w wh];
      s = [s; sh];
      h = [h 1+hh];
    end
  end

end

function [gpcf, w] = gpcf_matern32at_unpak(gpcf, w)
%GPCF_MATERN32_UNPAK  Sets the covariance function parameters into
%                 the structure
%
%  Description
%    [GPCF, W] = GPCF_MATERN32_UNPAK(GPCF, W) takes a covariance
%    function structure GPCF and a parameter vector W, and
%    returns a covariance function structure identical to the
%    input, except that the covariance parameters have been set
%    to the values in W. Deletes the values set to GPCF from W
%    and returns the modified W. This is a mandatory subfunction
%    used for example in energy and gradient computations.
%
%    Assignment is inverse of  
%       w = [ log(gpcf.magnSigma2)
%             (hyperparameters of gpcf.magnSigma2)
%             log(gpcf.lengthScale(:))
%             (hyperparameters of gpcf.lengthScale)]'
%
%  See also
%    GPCF_MATERN32_PAK

  gpp=gpcf.p;
  if ~isempty(gpp.magnSigma2)
    gpcf.magnSigma2 = exp(w(1));
    w = w(2:end);
    % Hyperparameters of magnSigma2
    [p, w] = gpcf.p.magnSigma2.fh.unpak(gpcf.p.magnSigma2, w);
    gpcf.p.magnSigma2 = p;
  end

  if isfield(gpcf,'metric')
    [metric, w] = gpcf.metric.fh.unpak(gpcf.metric, w);
    gpcf.metric = metric;
  else            
    if ~isempty(gpp.lengthScale)
      i1=1;
      i2=length(gpcf.lengthScale);
      gpcf.lengthScale = exp(w(i1:i2));
      w = w(i2+1:end);
      % Hyperparameters of lengthScale
      [p, w] = gpcf.p.lengthScale.fh.unpak(gpcf.p.lengthScale, w);
      gpcf.p.lengthScale = p;
    end
  end
  
end

function lp = gpcf_matern32at_lp(gpcf)
%GPCF_MATERN32_LP  Evaluate the log prior of covariance function parameters
%
%  Description
%    LP = GPCF_MATERN32_LP(GPCF) takes a covariance function
%    structure GPCF and returns log(p(th)), where th collects the
%    parameters. This is a mandatory subfunction used for example 
%    in energy computations.
%
%  See also
%    GPCF_SEXP_PAK, GPCF_SEXP_UNPAK, GPCF_SEXP_LPG, GP_LP

% Evaluate the prior contribution to the error. The parameters that
% are sampled are transformed, e.g., W = log(w) where w is all
% the "real" samples. On the other hand errors are evaluated in
% the W-space so we need take into account also the Jacobian of
% transformation, e.g., W -> w = exp(W). See Gelman et al. (2013),
% Bayesian Data Analysis, third edition, p. 21.
  lp = 0;
  gpp=gpcf.p;
  
  if ~isempty(gpcf.p.magnSigma2)
    lp = lp +gpp.magnSigma2.fh.lp(gpcf.magnSigma2, ...
                   gpp.magnSigma2) +log(gpcf.magnSigma2);
  end

  if isfield(gpcf,'metric')
    lp = lp +gpcf.metric.fh.lp(gpcf.metric);
  elseif ~isempty(gpp.lengthScale)
    lp = lp +gpp.lengthScale.fh.lp(gpcf.lengthScale, ...
                   gpp.lengthScale) +sum(log(gpcf.lengthScale));
  end
end

function lpg = gpcf_matern32at_lpg(gpcf)
%GPCF_matern32_LPG  Evaluate gradient of the log prior with respect
%                   to the parameters.
%
%  Description
%    LPG = GPCF_matern32_LPG(GPCF) takes a covariance function
%    structure GPCF and returns LPG = d log (p(th))/dth, where th
%    is the vector of parameters. This is a mandatory subfunction 
%    used in gradient computations.
%
%  See also
%    GPCF_MATERN32_PAK, GPCF_MATERN32_UNPAK, GPCF_MATERN32_LP, GP_G

  lpg = [];
  gpp=gpcf.p;
  
  if ~isempty(gpcf.p.magnSigma2)            
    lpgs = gpp.magnSigma2.fh.lpg(gpcf.magnSigma2, gpp.magnSigma2);
    lpg = [lpg lpgs(1).*gpcf.magnSigma2+1 lpgs(2:end)];
  end
  
  if isfield(gpcf,'metric')
    lpg_dist = gpcf.metric.fh.lpg(gpcf.metric);
    lpg = [lpg lpg_dist];
  else
    if ~isempty(gpcf.p.lengthScale)
      lll = length(gpcf.lengthScale);
      lpgs = gpp.lengthScale.fh.lpg(gpcf.lengthScale, gpp.lengthScale);
      lpg = [lpg lpgs(1:lll).*gpcf.lengthScale+1 lpgs(lll+1:end)];
    end
  end
end

function C = gpcf_matern32at_cov(gpcf, x1, x2)
%GP_MATERN32_COV  Evaluate covariance matrix between two input vectors
%
%  Description
%    C = GP_MATERN32_COV(GP, TX, X) takes in covariance function
%    of a Gaussian process GP and two matrixes TX and X that
%    contain input vectors to GP. Returns covariance matrix C. 
%    Every element ij of C contains covariance between inputs i
%    in TX and j in X. This is a mandatory subfunction used for 
%    example in prediction and energy computations.
%
%
%  See also
%    GPCF_MATERN32_TRCOV, GPCF_MATERN32_TRVAR, GP_COV, GP_TRCOV
  
    conf_fro = gpcf.conf_info.conf_fro; % coordinates of active frozen atoms (N_fro x 3)
    atomtype_mov = gpcf.conf_info.atomtype_mov; % atomtype indices for moving atoms (1 x N_mov)
    atomtype_fro = gpcf.conf_info.atomtype_fro; % atomtype indices for active frozen atoms (1 x N_fro)
    pairtype = gpcf.conf_info.pairtype; % pairtype indices for pairs of atomtypes (n_at x n_at)
    n_pt = gpcf.conf_info.n_pt; % number of active pairtypes
    
    if isempty(x2)
        x2 = x1;
    end

    n1 = size(x1,1);
    n2 = size(x2,1);
    N_mov = size(atomtype_mov,2);
    N_fro = size(atomtype_fro,2);
    ma2 = gpcf.magnSigma2;
    
    % Evaluate the covariance
    s2 = 1./gpcf.lengthScale.^2;
    % If ARD is not used make s a vector of
    % equal elements
    if size(s2)==1
        s2 = repmat(s2,1,n_pt);
    end
    dist=zeros(n1,n2);
    % distances between moving atoms
    if N_mov > 1
        for j = 1:N_mov-1
            for i = (j+1):N_mov
                invr_ij_1 = 1./sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_mov(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
            end
        end
    end
    % distances from moving atoms to frozen atoms
    if N_fro > 0
        for j = 1:N_mov
            for i = 1:N_fro
                invr_ij_1 = 1./sqrt(sum((bsxfun(@minus,x1(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((bsxfun(@minus,x2(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_fro(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
            end
        end
    end    
    dist = sqrt(dist);
    C = ma2.*(1+sqrt(3).*dist).*exp(-sqrt(3).*dist);
    C(C<eps)=0;
end

function C = gpcf_matern32at_trcov(gpcf, x)
%GP_MATERN32_TRCOV  Evaluate training covariance matrix of inputs
%
%  Description
%    C = GP_MATERN32_TRCOV(GP, TX) takes in covariance function
%    of a Gaussian process GP and matrix TX that contains
%    training input vectors. Returns covariance matrix C. Every
%    element ij of C contains covariance between inputs i and j
%    in TX. This is a mandatory subfunction used for example in
%    prediction and energy computations.
%
%  See also
%    GPCF_MATERN32_COV, GPCF_MATERN32_TRVAR, GP_COV, GP_TRCOV
  
    conf_fro = gpcf.conf_info.conf_fro; % coordinates of active frozen atoms (N_fro x 3)
    atomtype_mov = gpcf.conf_info.atomtype_mov; % atomtype indices for moving atoms (1 x N_mov)
    atomtype_fro = gpcf.conf_info.atomtype_fro; % atomtype indices for active frozen atoms (1 x N_fro)
    pairtype = gpcf.conf_info.pairtype; % pairtype indices for pairs of atomtypes (n_at x n_at)
    n_pt = gpcf.conf_info.n_pt; % number of active pairtypes

    n = size(x,1);
    N_mov = size(atomtype_mov,2);
    N_fro = size(atomtype_fro,2);
    ma2 = gpcf.magnSigma2;
    
    % Evaluate the covariance
    s2 = 1./gpcf.lengthScale.^2;
    % If ARD is not used make s a vector of
    % equal elements
    if size(s2)==1
        s2 = repmat(s2,1,n_pt);
    end
    dist=zeros(n,n);
    % distances between moving atoms
    if N_mov > 1
        for j = 1:N_mov-1
            for i = (j+1):N_mov
                invr_ij = 1./sqrt(sum((x(:,(j*3-2):(j*3))-x(:,(i*3-2):(i*3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_mov(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij,invr_ij')).^2;
            end
        end
    end
    % distances from moving atoms to frozen atoms
    if N_fro > 0
        for j = 1:N_mov
            for i = 1:N_fro
                invr_ij = 1./sqrt(sum((bsxfun(@minus,x(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_fro(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij,invr_ij')).^2;
            end
        end
    end    
    dist = sqrt(dist);
    C = ma2.*(1+sqrt(3).*dist).*exp(-sqrt(3).*dist);
    C(C<eps)=0;
end

function C = gpcf_matern32at_trvar(gpcf, x)
%GP_MATERN32_TRVAR  Evaluate training variance vector
%
%  Description
%    C = GP_MATERN32_TRVAR(GPCF, TX) takes in covariance function
%    of a Gaussian process GPCF and matrix TX that contains
%    training inputs. Returns variance vector C. Every element i
%    of C contains variance of input i in TX. This is a mandatory 
%    subfunction used for example in prediction and energy computations.
%
%
%  See also
%    GPCF_MATERN32_COV, GP_COV, GP_TRCOV        
  [n, m] =size(x);

  C = ones(n,1).*gpcf.magnSigma2;
  C(C<eps)=0;
end

function DKff = gpcf_matern32at_cfg(gpcf, x1, x2, mask, i1)
%GPCF_MATERN32_CFG  Evaluate gradient of covariance function 
%                      hyper-prior with respect to the parameters.
%
%  Description
%    DKff = GPCF_MATERN32_CFG(GPCF, X) takes a
%    covariance function structure GPCF, a matrix X of input
%    vectors and returns DKff, the gradients of covariance matrix
%    Kff = k(X,X) with respect to th (cell array with matrix
%    elements). This is a mandatory subfunction used for example 
%    in gradient computations.
%
%    DKff = GPCF_MATERN32_CFG(GPCF, X, X2) takes a
%    covariance function structure GPCF, a matrix X of input
%    vectors and returns DKff, the gradients of covariance matrix
%    Kff = k(X,X2) with respect to th (cell array with matrix
%    elements). This subfunction is needed when using sparse 
%    approximations (e.g. FIC).
%
%    DKff = GPCF_MATERN32_CFG(GPCF, X, [], MASK)
%    takes a covariance function structure GPCF, a matrix X
%    of input vectors and returns DKff, the diagonal of gradients
%    of covariance matrix Kff = k(X,X2) with respect to th (cell
%    array with matrix elements). This subfunction is needed when 
%    using sparse approximations (e.g. FIC).
%
%    DKff = GPCF_MATERN32_CFG(GPCF, X, X2, [], i) takes a
%    covariance function structure GPCF, a matrix X of input
%    vectors and returns DKff, the gradient of covariance matrix
%    Kff = k(X,X2) with respect to ith hyperparameter (matrix). 
%    5th input can also be used without X2. This subfunction is
%    needed when using memory save option in gp_set.
%
%  See also
%    GPCF_MATERN32_PAK, GPCF_MATERN32_UNPAK, GPCF_MATERN32_LP, GP_G
  
  conf_fro = gpcf.conf_info.conf_fro; % coordinates of active frozen atoms (N_fro x 3)
  atomtype_mov = gpcf.conf_info.atomtype_mov; % atomtype indices for moving atoms (1 x N_mov)
  atomtype_fro = gpcf.conf_info.atomtype_fro; % atomtype indices for active frozen atoms (1 x N_fro)
  pairtype = gpcf.conf_info.pairtype; % pairtype indices for pairs of atomtypes (n_at x n_at)
  n_pt = gpcf.conf_info.n_pt; % number of active pairtypes

  if nargin == 2 || isempty(x2)
      x2 = x1;
  end

  n1 = size(x1,1);
  n2 = size(x2,1);
  N_mov = size(atomtype_mov,2);
  N_fro = size(atomtype_fro,2);
  ma2 = gpcf.magnSigma2;

  DKff = {};
  ii1 = 0;

  if nargin==5
    % Use memory save option
    savememory=1;
    if i1==0
      i=0;
      % Return number of hyperparameters
      if ~isempty(gpcf.p.magnSigma2)
        i=1;
      end
      if ~isempty(gpcf.p.lengthScale)
        i=i+length(gpcf.lengthScale);
      end
      DKff=i;
      return
    end
  else
    savememory=0;
  end
  
  % Evaluate: DKff{1} = d Kff / d magnSigma2
  %           DKff{2} = d Kff / d lengthScale
  % NOTE! Here we have already taken into account that the parameters
  % are transformed through log() and thus dK/dlog(p) = p * dK/dp
  % evaluate the gradient for training covariance
 
  if nargin < 4 || isempty(mask)
    if size(x1,2) ~= size(x2,2)
      error('gpcf_matern32 -> _ghyper: The number of columns in x and x2 has to be the same. ')
    end

    s2 = 1./gpcf.lengthScale.^2;
    % If ARD is not used make s a vector of
    % equal elements
    if size(s2)==1
        s2 = repmat(s2,1,n_pt);
    end
    dist=zeros(n1,n2);
    % distances between moving atoms
    if N_mov > 1
        for j = 1:N_mov-1
            for i = (j+1):N_mov
                invr_ij_1 = 1./sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_mov(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
            end
        end
    end
    % distances from moving atoms to frozen atoms
    if N_fro > 0
        for j = 1:N_mov
            for i = 1:N_fro
                invr_ij_1 = 1./sqrt(sum((bsxfun(@minus,x1(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((bsxfun(@minus,x2(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_fro(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
            end
        end
    end
    
    if ~isempty(gpcf.p.magnSigma2)
        K = ma2.*(1+sqrt(3.*dist)).*exp(-sqrt(3.*dist));
        K(K<eps)=0;
        ii1 = ii1+1;
        DKff{ii1} = K;
    end
    
    [n1, dim] = size(x1);
    if savememory
      if i1==1
        DKff=DKff{1};
        return
      else
        ii1=ii1-1;
        i1=i1-1;
      end
    else
      i1=1:n_pt;
    end
      
    if ~isempty(gpcf.p.lengthScale)
      % Evaluate help matrix for calculations of derivatives with respect
      % to the lengthScale
      if length(gpcf.lengthScale) == 1
        % In the case of an isotropic matern32    
        DK_l = -3.*ma2.*exp(-sqrt(3*dist)).*dist;
        ii1=ii1+1;
        DKff{ii1} = DK_l;
      else
        % In the case ARD is used       
        for pt = 1:n_pt
            dist_pt{pt} = zeros(n1,n2);
        end
        % distances between moving atoms
        if N_mov > 1
            for j = 1:N_mov-1
                for i = (j+1):N_mov
                    invr_ij_1 = 1./sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                    invr_ij_2 = 1./sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                    pt = pairtype(atomtype_mov(i),atomtype_mov(j));
                    dist_pt{pt} = dist_pt{pt} - 2*s2(pt).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
                end
            end
        end
        % distances from moving atoms to frozen atoms
        if N_fro > 0
            for j = 1:N_mov
                for i = 1:N_fro
                    invr_ij_1 = 1./sqrt(sum((bsxfun(@minus,x1(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                    invr_ij_2 = 1./sqrt(sum((bsxfun(@minus,x2(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                    pt = pairtype(atomtype_fro(i),atomtype_mov(j));
                    dist_pt{pt} = dist_pt{pt} - 2*s2(pt).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
                end
            end
        end
        for i_th = i1
          DK_l = -3.*ma2.*exp(-sqrt(3*dist)).*dist_pt{i_th};
          ii1=ii1+1;
          DKff{ii1} = DK_l;
        end
      end      
    end
  
    % Evaluate: DKff{1}    = d mask(Kff,I) / d magnSigma2
    %           DKff{2...} = d mask(Kff,I) / d lengthScale
  elseif nargin == 4 || nargin == 5
    ii1=0;   
    if ~isempty(gpcf.p.magnSigma2) && (~savememory || all(i1==1))
      ii1 = ii1+1;
      DKff{ii1} = gpcf.fh.trvar(gpcf, x1);   % d mask(Kff,I) / d magnSigma2
    end
    if ~isempty(gpcf.p.lengthScale)
      for i2=1:length(gpcf.lengthScale)
        ii1 = ii1+1;
        DKff{ii1}  = 0; % d mask(Kff,I) / d lengthScale
      end
    end
  end
  if savememory
    DKff=DKff{1};
  end
end


function DKff = gpcf_matern32at_cfdg(gpcf, x1, x2, dims)
%GPCF_MATERN32_CFDG  Evaluate gradient of covariance function, of
%                which has been taken partial derivative with
%                respect to x1, with respect to parameters.
%
%  Description
%    DKff = GPCF_MATERN32_CFDG(GPCF, X) takes a covariance function
%    structure GPCF, a matrix X of input vectors and returns
%    DKff, the gradients of derivatived covariance matrix
%    dK(df,f)/dhyp = d(d k(X,X)/dx)/dhyp, with respect to the
%    parameters
%
%    Evaluate: DKff{1:m} = d Kff / d magnSigma2
%              DKff{m+1:2m} = d Kff / d lengthScale_m
%    m is the dimension of inputs. If ARD is used, then multiple
%    lengthScales. This subfunction is needed when using derivative 
%    observations.
%
%         dims - is a vector of input dimensions with respect to which the
%                derivatives of the covariance function have been calculated
%                [by default dims=1:size(x,2)]
%
%
%    Note! When coding the derivatives of the covariance function, remember
%    to double check them. See gp_cov for lines of code to check the
%    matrices
%
%  See also
%    GPCF_MATERN32_GINPUT

if isfield(gpcf,'metric')
    error('Metric doesnt work with grad.obs')
end

conf_fro = gpcf.conf_info.conf_fro; % coordinates of active frozen atoms (N_fro x 3)
atomtype_mov = gpcf.conf_info.atomtype_mov; % atomtype indices for moving atoms (1 x N_mov)
atomtype_fro = gpcf.conf_info.atomtype_fro; % atomtype indices for active frozen atoms (1 x N_fro)
pairtype = gpcf.conf_info.pairtype; % pairtype indices for pairs of atomtypes (n_at x n_at)
n_pt = gpcf.conf_info.n_pt; % number of active pairtypes

[n1,dim] = size(x1);
n2 = size(x2,1);
N_mov = size(atomtype_mov,2);
N_fro = size(atomtype_fro,2);
ma2 = gpcf.magnSigma2;

ii1=0;
DKff = {};
if nargin < 3 || isempty(x2)
    x2 = x1;
end
if nargin < 4 || isempty(dims)
    dims = 1:dim;
end

% grad with respect to MAGNSIGMA2
if ~isempty(gpcf.p.magnSigma2)
    Cdm = gpcf.fh.ginput4(gpcf, x1, x2, dims);
    DKffapu = cat(1,Cdm{1:end});
    ii1=ii1+1;
    DKff{ii1}=DKffapu;
end

% grad with respect to LENGTHSCALE
if ~isempty(gpcf.p.lengthScale)
    
    s2 = 1./gpcf.lengthScale.^2;
    % If ARD is not used make s a vector of
    % equal elements
    if size(s2)==1
        s2 = repmat(s2,1,n_pt);
        pairtype(:) = 1;
        n_pt = 1;
    end
    
    dist = zeros(n1,n2);
    for pt = 1:n_pt
        dist_pt{pt} = zeros(n1,n2);
        DK_pt{pt} = [];
    end
    % distances between moving atoms
    if N_mov > 1
        for j = 1:N_mov-1
            for i = (j+1):N_mov
                invr_ij_1 = 1./sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                pt = pairtype(atomtype_mov(i),atomtype_mov(j));
                dist = dist + 2*s2(pt).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
                dist_pt{pt} = dist_pt{pt} - 2*s2(pt).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
            end
        end
    end
    % distances from moving atoms to frozen atoms
    if N_fro > 0
        for j = 1:N_mov
            for i = 1:N_fro
                invr_ij_1 = 1./sqrt(sum((bsxfun(@minus,x1(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((bsxfun(@minus,x2(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                pt = pairtype(atomtype_fro(i),atomtype_mov(j));
                dist = dist + 2*s2(pt).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
                dist_pt{pt} = dist_pt{pt} - 2*s2(pt).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
            end
        end
    end
    dist = sqrt(dist);
    invdist = 1./dist;
    invdist(dist==0) = 0;
    expdist = exp(-sqrt(3).*dist);
    
    for i_dim = dims
        i = ceil(i_dim/3);
        xyz = i_dim-(i-1)*3;
        D1 = zeros(n1,n2);
        for pt = 1:n_pt
            D1_pt{pt} = zeros(n1,n2);
        end
        if N_mov > 1
            for j = 1:N_mov
                if i ~= j
                    r_ij_1 = sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                    r_ij_2 = sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                    pt = pairtype(atomtype_mov(i),atomtype_mov(j));
                    deriv_ij = -4*s2(pt).*bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                    deriv_ij = bsxfun(@times,deriv_ij,(x1(:,(i-1)*3+xyz)-x1(:,(j-1)*3+xyz))./r_ij_1.^3);
                    D1 = D1 + deriv_ij;
                    D1_pt{pt} = D1_pt{pt} - deriv_ij;
                end
            end
        end
        if N_fro > 0
            for j = 1:N_fro
                r_ij_1 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x1(:,(i*3-2):(i*3)))).^2,2));
                r_ij_2 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x2(:,(i*3-2):(i*3)))).^2,2));
                pt = pairtype(atomtype_mov(i),atomtype_fro(j));
                deriv_ij = -4*s2(pt).*bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                deriv_ij = bsxfun(@times,deriv_ij,(bsxfun(@minus,x1(:,(i-1)*3+xyz),conf_fro(j,xyz)))./r_ij_1.^3);
                D1 = D1 + deriv_ij;
                D1_pt{pt} = D1_pt{pt} - deriv_ij;
            end
        end
        for pt = 1:n_pt
            DK = -3.*ma2.*expdist.*D1_pt{pt} + 3/2*sqrt(3)*ma2*invdist.*expdist.*D1.*dist_pt{pt};
            DK_pt{pt} = cat(1,DK_pt{pt},DK);
        end
    end
    for pt = 1:n_pt
        ii1 = ii1+1;
        DKff{ii1} = DK_pt{pt};
    end
    
end
end

function DKff = gpcf_matern32at_cfdg2(gpcf, x1, x2, dims1, dims2)
%GPCF_MATERN32_CFDG2  Evaluate gradient of covariance function, of
%                     which has been taken partial derivatives with
%                     respect to both input variables x and x2 with respect
%                     to parameters.
%
%  Description
%    DKff = GPCF_MATERN32_CFDG2(GPCF, X) takes a covariance
%    function structure GPCF, a matrix X of input vectors and
%    returns DKff, the gradients of derivative covariance matrix
%    dK(df,df)/dhyp = d(d^2 k(X1,X2)/dX1dX2)/dhyp with respect to
%    the parameters
%
%    Evaluate: DKff{1-m} = d Kff / d magnSigma2
%              DKff{m+1-2m} = d Kff / d lengthScale_m
%    m is the dimension of inputs. If ARD is used, then multiple
%    lengthScales. This subfunction is needed when using derivative 
%    observations.
%
%    Note! When coding the derivatives of the covariance function, remember
%    to double check them. See gp_cov for lines of code to check the
%    matrices
%
%  See also
%   GPCF_MATERN32_GINPUT, GPCF_MATERN32_GINPUT2 
  
if isfield(gpcf,'metric')
    error('Metric doesnt work with grad.obs')
end

conf_fro = gpcf.conf_info.conf_fro; % coordinates of active frozen atoms (N_fro x 3)
atomtype_mov = gpcf.conf_info.atomtype_mov; % atomtype indices for moving atoms (1 x N_mov)
atomtype_fro = gpcf.conf_info.atomtype_fro; % atomtype indices for active frozen atoms (1 x N_fro)
pairtype = gpcf.conf_info.pairtype; % pairtype indices for pairs of atomtypes (n_at x n_at)
n_pt = gpcf.conf_info.n_pt; % number of active pairtypes
 
[n1,dim] = size(x1);
n2 = size(x2,1);
N_mov = size(atomtype_mov,2);
N_fro = size(atomtype_fro,2);
ma2 = gpcf.magnSigma2;

if nargin < 3 || isempty(x2)
    x2 = x1;
end
if nargin < 4 || isempty(dims1)
    %dims1 = 1:m;
    error('dims1 needs to be given')
end
if nargin < 5 || isempty(dims2)
    %dims2 = 1:m;
    error('dims2 needs to be given')
end

% NOTICE. AS OF NOW we assume that dims1 and dims2 are scalars

DKff = {};
ii1=0;
if dims1 == dims2
    DKdd = gpcf.fh.ginput2(gpcf, x1, x2, dims1);
else
    DKdd = gpcf.fh.ginput3(gpcf, x1, x2, dims1, dims2);
end

% grad with respect to MAGNSIGMA2
if ~isempty(gpcf.p.magnSigma2)
    ii1 = ii1 + 1;
    DKff{ii1} = DKdd{1};
end

% grad with respect to LENGTHSCALE
if ~isempty(gpcf.p.lengthScale)
    
    s2 = 1./gpcf.lengthScale.^2;
    % If ARD is not used make s a vector of
    % equal elements
    if size(s2)==1
        s2 = repmat(s2,1,n_pt);
        pairtype(:) = 1;
        n_pt = 1;
    end
    
    dist = zeros(n1,n2);
    for pt = 1:n_pt
        dist_pt{pt} = zeros(n1,n2);
        DK_pt{pt} = [];
    end
    % distances between moving atoms
    if N_mov > 1
        for j = 1:N_mov-1
            for i = (j+1):N_mov
                invr_ij_1 = 1./sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                pt = pairtype(atomtype_mov(i),atomtype_mov(j));
                dist = dist + 2*s2(pt).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
                dist_pt{pt} = dist_pt{pt} - 2*s2(pt).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
            end
        end
    end
    % distances from moving atoms to frozen atoms
    if N_fro > 0
        for j = 1:N_mov
            for i = 1:N_fro
                invr_ij_1 = 1./sqrt(sum((bsxfun(@minus,x1(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((bsxfun(@minus,x2(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                pt = pairtype(atomtype_fro(i),atomtype_mov(j));
                dist = dist + 2*s2(pt).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
                dist_pt{pt} = dist_pt{pt} - 2*s2(pt).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
            end
        end
    end
    dist = sqrt(dist);
    invdist = 1./dist;
    invdist(dist==0) = 0;
    expdist = exp(-sqrt(3).*dist);
    
    i_1 = ceil(dims1/3);
    xyz_1 = dims1-(i_1-1)*3;
    i_2 = ceil(dims2/3);
    xyz_2 = dims2-(i_2-1)*3;        
    D1 = zeros(n1,n2);
    D2 = zeros(n1,n2);
    D12 = zeros(n1,n2);
    for pt = 1:n_pt
      D1_pt{pt} = zeros(n1,n2);
      D2_pt{pt} = zeros(n1,n2);
      D12_pt{pt} = zeros(n1,n2);
    end
    if i_1 ~= i_2           
        i = i_1;
        if N_mov > 1
          for j = 1:N_mov
            if i ~= j
              r_ij_1 = sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
              r_ij_2 = sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
              pt = pairtype(atomtype_mov(i),atomtype_mov(j));
              temp = -4*s2(pt);
              temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
              temp_1 = (x1(:,(i-1)*3+xyz_1)-x1(:,(j-1)*3+xyz_1))./r_ij_1.^3;
              deriv_ij_1 = bsxfun(@times,temp*temp_0,temp_1);
              D1 = D1 + deriv_ij_1;
              D1_pt{pt} = D1_pt{pt} - deriv_ij_1;
            end
          end
        end
        if N_fro > 0
          for j = 1:N_fro   
            r_ij_1 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x1(:,(i*3-2):(i*3)))).^2,2));
            r_ij_2 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x2(:,(i*3-2):(i*3)))).^2,2));
            pt = pairtype(atomtype_mov(i),atomtype_fro(j));
            temp = -4*s2(pt);
            temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
            temp_1 = bsxfun(@minus,x1(:,(i-1)*3+xyz_1),conf_fro(j,xyz_1))./(r_ij_1.^3);
            deriv_ij_1 = bsxfun(@times,temp*temp_0,temp_1);
            D1 = D1 + deriv_ij_1;
            D1_pt{pt} = D1_pt{pt} - deriv_ij_1;
          end
        end              
        i = i_2;
        if N_mov > 1
          for j = 1:N_mov
            if i ~= j
              r_ij_1 = sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
              r_ij_2 = sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
              pt = pairtype(atomtype_mov(i),atomtype_mov(j));
              temp = -4*s2(pt);
              temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
              temp_2 = (x2(:,(i-1)*3+xyz_2)-x2(:,(j-1)*3+xyz_2))./r_ij_2.^3;
              deriv_ij_2 = bsxfun(@times,-temp*temp_0,temp_2');
              D2 = D2 + deriv_ij_2;
              D2_pt{pt} = D2_pt{pt} - deriv_ij_2;
            end
          end
        end
        if N_fro > 0
          for j = 1:N_fro
            r_ij_1 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x1(:,(i*3-2):(i*3)))).^2,2));
            r_ij_2 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x2(:,(i*3-2):(i*3)))).^2,2));
            pt = pairtype(atomtype_mov(i),atomtype_fro(j));
            temp = -4*s2(pt);
            temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
            temp_2 = bsxfun(@minus,x2(:,(i-1)*3+xyz_2),conf_fro(j,xyz_2))./(r_ij_2.^3);
            deriv_ij_2 = bsxfun(@times,-temp*temp_0,temp_2');
            D2 = D2 + deriv_ij_2;
            D2_pt{pt} = D2_pt{pt} - deriv_ij_2;
          end
        end
        i = i_1;
        j = i_2;
        r_ij_1 = sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
        r_ij_2 = sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
        pt = pairtype(atomtype_mov(i),atomtype_mov(j));
        temp = -4*s2(pt);
        temp_1 = (x1(:,(i-1)*3+xyz_1)-x1(:,(j-1)*3+xyz_1))./r_ij_1.^3;
        temp_2 = (x2(:,(j-1)*3+xyz_2)-x2(:,(i-1)*3+xyz_2))./r_ij_2.^3;
        deriv_ij_12 = bsxfun(@times,temp*temp_1,temp_2');
        D12 = D12 + deriv_ij_12;
        D12_pt{pt} = D12_pt{pt} - deriv_ij_12;
    else
        i = i_1;
        if N_mov > 1
          for j = 1:N_mov
            if i ~= j
              r_ij_1 = sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
              r_ij_2 = sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
              pt = pairtype(atomtype_mov(i),atomtype_mov(j));
              temp = -4*s2(pt);
              temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
              temp_1 = (x1(:,(i-1)*3+xyz_1)-x1(:,(j-1)*3+xyz_1))./r_ij_1.^3;
              temp_2 = (x2(:,(i-1)*3+xyz_2)-x2(:,(j-1)*3+xyz_2))./r_ij_2.^3;
              deriv_ij_1 = bsxfun(@times,temp*temp_0,temp_1);
              deriv_ij_2 = bsxfun(@times,-temp*temp_0,temp_2');
              deriv_ij_12 = bsxfun(@times,temp*temp_1,temp_2');
              D1 = D1 + deriv_ij_1;
              D2 = D2 + deriv_ij_2;
              D12 = D12 + deriv_ij_12;
              D1_pt{pt} = D1_pt{pt} - deriv_ij_1;
              D2_pt{pt} = D2_pt{pt} - deriv_ij_2;
              D12_pt{pt} = D12_pt{pt} - deriv_ij_12;
            end
          end
        end
        if N_fro > 0
          for j = 1:N_fro
            r_ij_1 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x1(:,(i*3-2):(i*3)))).^2,2));
            r_ij_2 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x2(:,(i*3-2):(i*3)))).^2,2));
            pt = pairtype(atomtype_mov(i),atomtype_fro(j));
            temp = -4*s2(pt);
            temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
            temp_1 = bsxfun(@minus,x1(:,(i-1)*3+xyz_1),conf_fro(j,xyz_1))./(r_ij_1.^3);
            temp_2 = bsxfun(@minus,x2(:,(i-1)*3+xyz_2),conf_fro(j,xyz_2))./(r_ij_2.^3);
            deriv_ij_1 = bsxfun(@times,temp*temp_0,temp_1);
            deriv_ij_2 = bsxfun(@times,-temp*temp_0,temp_2');
            deriv_ij_12 = bsxfun(@times,temp*temp_1,temp_2');
            D1 = D1 + deriv_ij_1;
            D2 = D2 + deriv_ij_2;
            D12 = D12 + deriv_ij_12;
            D1_pt{pt} = D1_pt{pt} - deriv_ij_1;
            D2_pt{pt} = D2_pt{pt} - deriv_ij_2;
            D12_pt{pt} = D12_pt{pt} - deriv_ij_12;
          end
        end
    end
    for pt = 1:n_pt
        DK_pt = -3.*ma2.*expdist.*D12_pt{pt} - 3/4*sqrt(3)*ma2*invdist.^2.*expdist.*(invdist+sqrt(3)).*D1.*D2.*dist_pt{pt};
        DK_pt = DK_pt + 3/2*sqrt(3)*ma2*invdist.*expdist.*(D1.*D2_pt{pt}+D2.*D1_pt{pt}+dist_pt{pt}.*D12);
        ii1 = ii1+1;
        DKff{ii1} = DK_pt;
    end
end

end


function DKff = gpcf_matern32at_ginput(gpcf, x1, x2, i1)
%GPCF_MATERN32_GINPUT  Evaluate gradient of covariance function with 
%                      respect to x.
%
%  Description
%    DKff = GPCF_MATERN32_GINPUT(GPCF, X) takes a covariance
%    function structure GPCF, a matrix X of input vectors
%    and returns DKff, the gradients of covariance matrix Kff =
%    k(X,X) with respect to X (cell array with matrix elements).
%    This subfunction is needed when computing gradients with 
%    respect to inducing inputs in sparse approximations.
%
%    DKff = GPCF_MATERN32_GINPUT(GPCF, X, X2) takes a covariance
%    function structure GPCF, a matrix X of input vectors
%    and returns DKff, the gradients of covariance matrix Kff =
%    k(X,X2) with respect to X (cell array with matrix elements).
%    This subfunction is needed when computing gradients with 
%    respect to inducing inputs in sparse approximations.
%
%    DKff = GPCF_MATERN32_GINPUT(GPCF, X, X2, i) takes a covariance
%    function structure GPCF, a matrix X of input vectors
%    and returns DKff, the gradients of covariance matrix Kff =
%    k(X,X2) with respect to ith covariate in X (matrix). This 
%    subfunction is needed when using memory save option in gp_set.
%
%  See also
%    GPCF_MATERN32_PAK, GPCF_MATERN32_UNPAK, GPCF_MATERN32_LP, GP_G

    conf_fro = gpcf.conf_info.conf_fro; % coordinates of active frozen atoms (N_fro x 3)
    atomtype_mov = gpcf.conf_info.atomtype_mov; % atomtype indices for moving atoms (1 x N_mov)
    atomtype_fro = gpcf.conf_info.atomtype_fro; % atomtype indices for active frozen atoms (1 x N_fro)
    pairtype = gpcf.conf_info.pairtype; % pairtype indices for pairs of atomtypes (n_at x n_at)
    n_pt = gpcf.conf_info.n_pt; % number of active pairtypes

    if nargin == 2 || isempty(x2)
        x2 = x1;
    end

    [n1,dim] = size(x1);
    n2 = size(x2,1);
    N_mov = size(atomtype_mov,2);
    N_fro = size(atomtype_fro,2);
    ma2 = gpcf.magnSigma2;
    
    s2 = 1./gpcf.lengthScale.^2;
    % If ARD is not used make s a vector of
    % equal elements
    if length(s2)==1
        s2 = repmat(s2,1,n_pt);
    end
    
    if nargin==4
      % Use memory save option
      savememory=1;
      if i1==0
        % Return number of covariates
        DKff=dim;
        return
      end
    else
      savememory=0;
    end

    dist=zeros(n1,n2);
    % distances between moving atoms
    if N_mov > 1
        for j = 1:N_mov-1
            for i = (j+1):N_mov
                invr_ij_1 = 1./sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_mov(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
            end
        end
    end
    % distances from moving atoms to frozen atoms
    if N_fro > 0
        for j = 1:N_mov
            for i = 1:N_fro
                invr_ij_1 = 1./sqrt(sum((bsxfun(@minus,x1(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((bsxfun(@minus,x2(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_fro(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
            end
        end
    end
    
    if ~savememory
      i1=1:dim;
    end
    ii1 = 0;
    for obs1 = 1:n1
      for i_dim = i1
        i = ceil(i_dim/3);
        xyz = i_dim-(i-1)*3;
        D1 = zeros(n1,n2);
        if N_mov > 1
            for j = 1:N_mov
              if i ~= j
                  r_ij_1 = sqrt(sum((x1(obs1,(j*3-2):(j*3))-x1(obs1,(i*3-2):(i*3))).^2,2));
                  r_ij_2 = sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                  deriv_ij = -4*s2(pairtype(atomtype_mov(i),atomtype_mov(j))).*bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                  deriv_ij = bsxfun(@rdivide,deriv_ij.*(x1(obs1,(i-1)*3+xyz)-x1(obs1,(j-1)*3+xyz)),r_ij_1.^3);
                  D1(obs1,:) = D1(obs1,:) + deriv_ij;
              end
            end
        end
        if N_fro > 0
            for j = 1:N_fro
              r_ij_1 = sqrt(sum((conf_fro(j,1:3)-x1(obs1,(i*3-2):(i*3))).^2,2));
              r_ij_2 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x2(:,(i*3-2):(i*3)))).^2,2));
              deriv_ij = -4*s2(pairtype(atomtype_mov(i),atomtype_fro(j))).*bsxfun(@minus,1./r_ij_1,1./r_ij_2');
              deriv_ij = bsxfun(@rdivide,deriv_ij.*(x1(obs1,(i-1)*3+xyz)-conf_fro(j,xyz)),r_ij_1.^3);
              D1(obs1,:) = D1(obs1,:) + deriv_ij;
            end
        end
        DK = -3/2.*ma2.*exp(-sqrt(3.*dist)).*D1;
        ii1 = ii1 + 1;
        DKff{ii1} = DK;
      end
    end
end

function DKff = gpcf_matern32at_ginput2(gpcf, x1, x2, dims, takeOnlyDiag)
%GPCF_MATERN32_GINPUT2  Evaluate gradient of covariance function with
%                   respect to both input variables x and x2 (in
%                   same dimension).
%
%  Description
%    DKff = GPCF_MATERN32_GINPUT2(GPCF, X, X2) takes a covariance
%    function structure GPCF, a matrix X of input vectors and
%    returns DKff, the gradients of twice derivatived covariance
%    matrix K(df,df) = dk(X1,X2)/dX1dX2 (cell array with matrix
%    elements). Input variable's dimensions are expected to be
%    same. The function returns also DKff1 and DKff2 which are
%    parts of DKff and needed with CFDG2. DKff = DKff1 -
%    DKff2. This subfunction is needed when using derivative 
%    observations.
%   
%    Note! When coding the derivatives of the covariance function, remember
%    to double check them. See gp_cov for lines of code to check the
%    matrices
%
%  See also
%    GPCF_MATERN32_GINPUT, GPCF_MATERN32_GINPUT2, GPCF_MATERN32_CFDG2 
  
  conf_fro = gpcf.conf_info.conf_fro; % coordinates of active frozen atoms (N_fro x 3)
  atomtype_mov = gpcf.conf_info.atomtype_mov; % atomtype indices for moving atoms (1 x N_mov)
  atomtype_fro = gpcf.conf_info.atomtype_fro; % atomtype indices for active frozen atoms (1 x N_fro)
  pairtype = gpcf.conf_info.pairtype; % pairtype indices for pairs of atomtypes (n_at x n_at)
  n_pt = gpcf.conf_info.n_pt; % number of active pairtypes
  
  [n1,dim] = size(x1);
  n2 = size(x2,1);
  N_mov = size(atomtype_mov,2);
  N_fro = size(atomtype_fro,2);
  ma2 = gpcf.magnSigma2;

  s2 = 1./gpcf.lengthScale.^2;
  % If ARD is not used make s a vector of
  % equal elements
  if length(s2)==1
      s2 = repmat(s2,1,n_pt);
  end
  
  ii1 = 0;
  if nargin < 3
    error('Needs at least 4 input arguments')
  end
  if nargin < 4 || isempty(dims)
      dims = 1:dim;
  end

  if nargin == 5 && isequal(takeOnlyDiag,'takeOnlyDiag')
      for i_dim = dims
          i = ceil(i_dim/3);
          xyz = i_dim-(i-1)*3;
          D12 = zeros(n1,1);
          if N_mov > 1
            for j = 1:N_mov
              if i ~= j
                r_ij_1 = sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                temp = -4*s2(pairtype(atomtype_mov(i),atomtype_mov(j)));
                temp_1 = (x1(:,(i-1)*3+xyz)-x1(:,(j-1)*3+xyz))./r_ij_1.^3;
                deriv_ij_12 = temp*temp_1.^2;
                D12 = D12 + deriv_ij_12;
              end
            end
          end
          if N_fro > 0
            for j = 1:N_fro
              r_ij_1 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x1(:,(i*3-2):(i*3)))).^2,2));
              temp = -4*s2(pairtype(atomtype_mov(i),atomtype_fro(j)));
              temp_1 = bsxfun(@minus,x1(:,(i-1)*3+xyz),conf_fro(j,xyz))./(r_ij_1.^3);
              deriv_ij_12 = temp*temp_1.^2;
              D12 = D12 + deriv_ij_12;
            end
          end
          DK = -3/2.*ma2.*D12;
          ii1 = ii1 + 1;
          DKff{ii1} = DK;
      end
  else
      
      %metric doesn't work with grad.obs on
      if isfield(gpcf,'metric')
          error('Metric doesnt work with grad.obs')
      else
          dist=zeros(n1,n2);
          % distances between moving atoms
          if N_mov > 1
            for j = 1:N_mov-1
              for i = (j+1):N_mov
                invr_ij_1 = 1./sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_mov(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
              end
            end
          end
          % distances from moving atoms to frozen atoms
          if N_fro > 0
            for j = 1:N_mov
              for i = 1:N_fro
                invr_ij_1 = 1./sqrt(sum((bsxfun(@minus,x1(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((bsxfun(@minus,x2(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_fro(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
              end
            end
          end
          dist = sqrt(dist);
          invdist = 1./dist;
          invdist(dist==0) = 0;
          expdist = exp(-sqrt(3).*dist);
          
          for i_dim = dims
            i = ceil(i_dim/3);
            xyz = i_dim-(i-1)*3;
            D1 = zeros(n1,n2);
            D2 = zeros(n1,n2);
            D12 = zeros(n1,n2);
            if N_mov > 1
              for j = 1:N_mov
                if i ~= j
                    r_ij_1 = sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                    r_ij_2 = sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                    temp = -4*s2(pairtype(atomtype_mov(i),atomtype_mov(j)));
                    temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                    temp_1 = (x1(:,(i-1)*3+xyz)-x1(:,(j-1)*3+xyz))./r_ij_1.^3;
                    temp_2 = (x2(:,(i-1)*3+xyz)-x2(:,(j-1)*3+xyz))./r_ij_2.^3;
                    deriv_ij_1 = bsxfun(@times,temp*temp_0,temp_1);
                    deriv_ij_2 = bsxfun(@times,-temp*temp_0,temp_2');
                    deriv_ij_12 = bsxfun(@times,temp*temp_1,temp_2');
                    D1 = D1 + deriv_ij_1;
                    D2 = D2 + deriv_ij_2;
                    D12 = D12 + deriv_ij_12;
                end
              end
            end
            if N_fro > 0
              for j = 1:N_fro   
                r_ij_1 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x1(:,(i*3-2):(i*3)))).^2,2));
                r_ij_2 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x2(:,(i*3-2):(i*3)))).^2,2));
                temp = -4*s2(pairtype(atomtype_mov(i),atomtype_fro(j)));
                temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                temp_1 = bsxfun(@minus,x1(:,(i-1)*3+xyz),conf_fro(j,xyz))./(r_ij_1.^3);
                temp_2 = bsxfun(@minus,x2(:,(i-1)*3+xyz),conf_fro(j,xyz))./(r_ij_2.^3);
                deriv_ij_1 = bsxfun(@times,temp*temp_0,temp_1);
                deriv_ij_2 = bsxfun(@times,-temp*temp_0,temp_2');
                deriv_ij_12 = bsxfun(@times,temp*temp_1,temp_2');
                D1 = D1 + deriv_ij_1;
                D2 = D2 + deriv_ij_2;
                D12 = D12 + deriv_ij_12;
              end
            end
            DK = -3/2.*ma2.*expdist.*D12;
            DK2 = 3/4*sqrt(3)*ma2*invdist.*expdist.*D1.*D2;
            ii1 = ii1 + 1;
            DKff{ii1} = DK + DK2;
          end
          
      end
  end
end

function DKff = gpcf_matern32at_ginput3(gpcf, x1, x2, dims1, dims2)
%GPCF_MATERN32_GINPUT3  Evaluate gradient of covariance function with
%                   respect to both input variables x and x2 (in
%                   different dimensions).
%
%  Description
%    DKff = GPCF_MATERN32_GINPUT3(GPCF, X, X2) takes a covariance
%    function structure GPCF, a matrix X of input vectors and
%    returns DKff, the gradients of twice derivatived covariance
%    matrix K(df,df) = dk(X1,X2)/dX1dX2 (cell array with matrix
%    elements). The derivative is calculated in multidimensional
%    problem between input's observation dimensions which are not
%    same. This subfunction is needed when using derivative 
%    observations.
%
%    ---- !!note this help text needs to be corrected !! ---
%    DKff is a cell array with the following elements:
%      DKff{1} = dk(X1,X2)/dX1_1dX2_2
%      DKff{2} = dk(X1,X2)/dX1_1dX2_3
%       ... 
%      DKff{m-1} = dk(X1,X2)/dX1_1dX2_m
%      DKff{m} = dk(X1,X2)/dX1_2dX2_3
%       ...
%      DKff{m} = dk(X1,X2)/dX1_(m-1)dX2_m
%    where _m denotes the input dimension with respect to which the
%    gradient is calculated.
%     ---- clip ---
%   
%    Note! When coding the derivatives of the covariance function, remember
%    to double check them. See gp_cov for lines of code to check the
%    matrices
%
%  See also
%    GPCF_MATERN32_GINPUT, GPCF_MATERN32_GINPUT2, GPCF_MATERN32_CFDG2 
  
  conf_fro = gpcf.conf_info.conf_fro; % coordinates of active frozen atoms (N_fro x 3)
  atomtype_mov = gpcf.conf_info.atomtype_mov; % atomtype indices for moving atoms (1 x N_mov)
  atomtype_fro = gpcf.conf_info.atomtype_fro; % atomtype indices for active frozen atoms (1 x N_fro)
  pairtype = gpcf.conf_info.pairtype; % pairtype indices for pairs of atomtypes (n_at x n_at)
  n_pt = gpcf.conf_info.n_pt; % number of active pairtypes
  
  [n1,dim] = size(x1);
  n2 = size(x2,1);
  N_mov = size(atomtype_mov,2);
  N_fro = size(atomtype_fro,2);
  ma2 = gpcf.magnSigma2;

  s2 = 1./gpcf.lengthScale.^2;
  % If ARD is not used make s a vector of
  % equal elements
  if length(s2)==1
      s2 = repmat(s2,1,n_pt);
  end
  
  ii1 = 0;
  if nargin < 3
    error('Needs at least 4 input arguments')
  end
  if nargin < 4 || isempty(dims1)
      dims1 = 1:dim;
  end
  if nargin < 5 || isempty(dims2)
      dims2 = 1:dim;
  end
  
  if isfield(gpcf,'metric')
      error('Metric doesnt work with ginput3')
  else
      
      dist=zeros(n1,n2);
      % distances between moving atoms
      if N_mov > 1
          for j = 1:N_mov-1
              for i = (j+1):N_mov
                  invr_ij_1 = 1./sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                  invr_ij_2 = 1./sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                  dist = dist + 2*s2(pairtype(atomtype_mov(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
              end
          end
      end
      % distances from moving atoms to frozen atoms
      if N_fro > 0
          for j = 1:N_mov
              for i = 1:N_fro
                invr_ij_1 = 1./sqrt(sum((bsxfun(@minus,x1(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((bsxfun(@minus,x2(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_fro(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
              end
          end
      end
      dist = sqrt(dist);
      invdist = 1./dist;
      invdist(dist==0) = 0;
      expdist = exp(-sqrt(3).*dist);
          
      for i_dim1 = dims1
        for i_dim2 = dims2
          i_1 = ceil(i_dim1/3);
          xyz_1 = i_dim1-(i_1-1)*3;
          i_2 = ceil(i_dim2/3);
          xyz_2 = i_dim2-(i_2-1)*3;        
          D1 = zeros(n1,n2);
          D2 = zeros(n1,n2);
          D12 = zeros(n1,n2);
          if i_1 ~= i_2           
              i = i_1;
              if N_mov > 1
                for j = 1:N_mov
                  if i ~= j
                    r_ij_1 = sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                    r_ij_2 = sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                    temp = -4*s2(pairtype(atomtype_mov(i),atomtype_mov(j)));
                    temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                    temp_1 = (x1(:,(i-1)*3+xyz_1)-x1(:,(j-1)*3+xyz_1))./r_ij_1.^3;
                    deriv_ij_1 = bsxfun(@times,temp*temp_0,temp_1);
                    D1 = D1 + deriv_ij_1;
                  end
                end
              end
              if N_fro > 0
                for j = 1:N_fro
                  r_ij_1 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x1(:,(i*3-2):(i*3)))).^2,2));
                  r_ij_2 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x2(:,(i*3-2):(i*3)))).^2,2));
                  temp = -4*s2(pairtype(atomtype_mov(i),atomtype_fro(j)));
                  temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                  temp_1 = bsxfun(@minus,x1(:,(i-1)*3+xyz_1),conf_fro(j,xyz_1))./(r_ij_1.^3);
                  deriv_ij_1 = bsxfun(@times,temp*temp_0,temp_1);
                  D1 = D1 + deriv_ij_1;
                end
              end              
              i = i_2;
              if N_mov > 1
                for j = 1:N_mov
                  if i ~= j
                    r_ij_1 = sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                    r_ij_2 = sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                    temp = -4*s2(pairtype(atomtype_mov(i),atomtype_mov(j)));
                    temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                    temp_2 = (x2(:,(i-1)*3+xyz_2)-x2(:,(j-1)*3+xyz_2))./r_ij_2.^3;
                    deriv_ij_2 = bsxfun(@times,-temp*temp_0,temp_2');
                    D2 = D2 + deriv_ij_2;
                  end
                end
              end
              if N_fro > 0
                for j = 1:N_fro
                  r_ij_1 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x1(:,(i*3-2):(i*3)))).^2,2));
                  r_ij_2 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x2(:,(i*3-2):(i*3)))).^2,2));
                  temp = -4*s2(pairtype(atomtype_mov(i),atomtype_fro(j)));
                  temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                  temp_2 = bsxfun(@minus,x2(:,(i-1)*3+xyz_2),conf_fro(j,xyz_2))./(r_ij_2.^3);
                  deriv_ij_2 = bsxfun(@times,-temp*temp_0,temp_2');
                  D2 = D2 + deriv_ij_2;
                end
              end
              i = i_1;
              j = i_2;
              r_ij_1 = sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
              r_ij_2 = sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
              temp = -4*s2(pairtype(atomtype_mov(i),atomtype_mov(j)));
              temp_1 = (x1(:,(i-1)*3+xyz_1)-x1(:,(j-1)*3+xyz_1))./r_ij_1.^3;
              temp_2 = (x2(:,(j-1)*3+xyz_2)-x2(:,(i-1)*3+xyz_2))./r_ij_2.^3;
              deriv_ij_12 = bsxfun(@times,temp*temp_1,temp_2');
              D12 = D12 + deriv_ij_12;
          else
              i = i_1;
              if N_mov > 1
                for j = 1:N_mov
                  if i ~= j
                    r_ij_1 = sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                    r_ij_2 = sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                    temp = -4*s2(pairtype(atomtype_mov(i),atomtype_mov(j)));
                    temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                    temp_1 = (x1(:,(i-1)*3+xyz_1)-x1(:,(j-1)*3+xyz_1))./r_ij_1.^3;
                    temp_2 = (x2(:,(i-1)*3+xyz_2)-x2(:,(j-1)*3+xyz_2))./r_ij_2.^3;
                    deriv_ij_1 = bsxfun(@times,temp*temp_0,temp_1);
                    deriv_ij_2 = bsxfun(@times,-temp*temp_0,temp_2');
                    deriv_ij_12 = bsxfun(@times,temp*temp_1,temp_2');
                    D1 = D1 + deriv_ij_1;
                    D2 = D2 + deriv_ij_2;
                    D12 = D12 + deriv_ij_12;
                  end
                end
              end
              if N_fro > 0
                for j = 1:N_fro
                  r_ij_1 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x1(:,(i*3-2):(i*3)))).^2,2));
                  r_ij_2 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x2(:,(i*3-2):(i*3)))).^2,2));
                  temp = -4*s2(pairtype(atomtype_mov(i),atomtype_fro(j)));
                  temp_0 = bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                  temp_1 = bsxfun(@minus,x1(:,(i-1)*3+xyz_1),conf_fro(j,xyz_1))./(r_ij_1.^3);
                  temp_2 = bsxfun(@minus,x2(:,(i-1)*3+xyz_2),conf_fro(j,xyz_2))./(r_ij_2.^3);
                  deriv_ij_1 = bsxfun(@times,temp*temp_0,temp_1);
                  deriv_ij_2 = bsxfun(@times,-temp*temp_0,temp_2');
                  deriv_ij_12 = bsxfun(@times,temp*temp_1,temp_2');
                  D1 = D1 + deriv_ij_1;
                  D2 = D2 + deriv_ij_2;
                  D12 = D12 + deriv_ij_12;
                end
              end
          end
          DK = -3/2.*ma2.*expdist.*D12;
          DK2 = 3/4*sqrt(3)*ma2*invdist.*expdist.*D1.*D2;
          ii1 = ii1 + 1;
          DKff{ii1} = DK + DK2;
          
        end
      end
      
  end
end

function DKff = gpcf_matern32at_ginput4(gpcf, x1, x2, dims)
%GPCF_MATERN32_GINPUT4  Evaluate gradient of covariance function with 
%                       respect to x. Simplified and faster version of
%                       matern32_ginput, returns full matrices.
%
%  Description
%    DKff = GPCF_MATERN32_GHYPER(GPCF, X) takes a covariance function
%    structure GPCF, a matrix X of input vectors and returns
%    DKff, the gradients of covariance matrix Kff = k(X,X) with
%    respect to X (whole matrix). This subfunction is needed when 
%    using derivative observations.
%
%    DKff = GPCF_MATERN32_GHYPER(GPCF, X, X2) takes a covariance
%    function structure GPCF, a matrix X of input vectors and
%    returns DKff, the gradients of covariance matrix Kff =
%    k(X,X2) with respect to X (whole matrix). This subfunction 
%    is needed when using derivative observations.
%
%    DKff = GPCF_MATERN32_GHYPER(GPCF, X, X2, DIMS) returns DKff, the gradients
%    of covariance matrix Kff = k(X,X2) with respect to dimensions DIMS of
%    X. 
%
%    Note! When coding the derivatives of the covariance function, remember
%    to double check them. See gp_cov for lines of code to check the
%    matrices
%
%  See also
%    GPCF_MATERN32_PAK, GPCF_MATERN32_UNPAK, GPCF_MATERN32_LP, GP_G
  
  conf_fro = gpcf.conf_info.conf_fro; % coordinates of active frozen atoms (N_fro x 3)
  atomtype_mov = gpcf.conf_info.atomtype_mov; % atomtype indices for moving atoms (1 x N_mov)
  atomtype_fro = gpcf.conf_info.atomtype_fro; % atomtype indices for active frozen atoms (1 x N_fro)
  pairtype = gpcf.conf_info.pairtype; % pairtype indices for pairs of atomtypes (n_at x n_at)
  n_pt = gpcf.conf_info.n_pt; % number of active pairtypes
 
  if nargin == 2 || isempty(x2)
      x2 = x1;
  end
  
  [n1,dim] = size(x1);
  n2 = size(x2,1);
  N_mov = size(atomtype_mov,2);
  N_fro = size(atomtype_fro,2);
  ma2 = gpcf.magnSigma2;

  s2 = 1./gpcf.lengthScale.^2;
  % If ARD is not used make s a vector of
  % equal elements
  if length(s2)==1
      s2 = repmat(s2,1,n_pt);
  end
  
  ii1 = 0;
  if nargin<4
    dims=1:dim;
  end
    
  if isfield(gpcf,'metric')
    error('no metric implemented')
  else
    dist=zeros(n1,n2);
    % distances between moving atoms
    if N_mov > 1
        for j = 1:N_mov-1
            for i = (j+1):N_mov
                invr_ij_1 = 1./sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_mov(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
            end
        end
    end
    % distances from moving atoms to frozen atoms
    if N_fro > 0
        for j = 1:N_mov
            for i = 1:N_fro
                invr_ij_1 = 1./sqrt(sum((bsxfun(@minus,x1(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                invr_ij_2 = 1./sqrt(sum((bsxfun(@minus,x2(:,(j*3-2):(j*3)),conf_fro(i,1:3))).^2,2));
                dist = dist + 2*s2(pairtype(atomtype_fro(i),atomtype_mov(j))).*(bsxfun(@minus,invr_ij_1,invr_ij_2')).^2;
            end
        end
    end
    for i_dim = dims
        i = ceil(i_dim/3);
        xyz = i_dim-(i-1)*3;
        D1 = zeros(n1,n2);
        if N_mov > 1
            for j = 1:N_mov
                if i ~= j
                    r_ij_1 = sqrt(sum((x1(:,(j*3-2):(j*3))-x1(:,(i*3-2):(i*3))).^2,2));
                    r_ij_2 = sqrt(sum((x2(:,(j*3-2):(j*3))-x2(:,(i*3-2):(i*3))).^2,2));
                    deriv_ij = -4*s2(pairtype(atomtype_mov(i),atomtype_mov(j))).*bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                    deriv_ij = bsxfun(@times,deriv_ij,(x1(:,(i-1)*3+xyz)-x1(:,(j-1)*3+xyz))./r_ij_1.^3);
                    D1 = D1 + deriv_ij;
                end
            end
        end
        if N_fro > 0
            for j = 1:N_fro
                r_ij_1 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x1(:,(i*3-2):(i*3)))).^2,2));
                r_ij_2 = sqrt(sum((bsxfun(@minus,conf_fro(j,1:3),x2(:,(i*3-2):(i*3)))).^2,2));
                deriv_ij = -4*s2(pairtype(atomtype_mov(i),atomtype_fro(j))).*bsxfun(@minus,1./r_ij_1,1./r_ij_2');
                deriv_ij = bsxfun(@times,deriv_ij,(bsxfun(@minus,x1(:,(i-1)*3+xyz),conf_fro(j,xyz)))./r_ij_1.^3);
                D1 = D1 + deriv_ij;
            end
        end
        DK = -3/2.*ma2.*exp(-sqrt(3.*dist)).*D1;
        ii1 = ii1 + 1;
        DKff{ii1} = DK;
    end
  end
end

function reccf = gpcf_matern32at_recappend(reccf, ri, gpcf)
%RECAPPEND  Record append
%
%  Description
%    RECCF = GPCF_MATERN32_RECAPPEND(RECCF, RI, GPCF) takes a
%    covariance function record structure RECCF, record index RI
%    and covariance function structure GPCF with the current MCMC
%    samples of the parameters. Returns RECCF which contains all
%    the old samples and the current samples from GPCF. This 
%    subfunction is needed when using MCMC sampling (gp_mc).
%
%  See also
%    GP_MC and GP_MC -> RECAPPEND

  if nargin == 2
    % Initialize the record
    reccf.type = 'gpcf_matern32at';

    % Initialize parameters
    reccf.lengthScale= [];
    reccf.magnSigma2 = [];

    % Set the function handles
    reccf.fh.pak = @gpcf_matern32at_pak;
    reccf.fh.unpak = @gpcf_matern32at_unpak;
    reccf.fh.lp = @gpcf_matern32at_lp;
    reccf.fh.lpg = @gpcf_matern32at_lpg;
    reccf.fh.cfg = @gpcf_matern32at_cfg;
    reccf.fh.cfdg = @gpcf_matern32at_cfdg;
    reccf.fh.cfdg2 = @gpcf_matern32at_cfdg2;
    reccf.fh.ginput = @gpcf_matern32at_ginput;
    reccf.fh.ginput2 = @gpcf_matern32at_ginput2;
    reccf.fh.ginput3 = @gpcf_matern32at_ginput3;
    reccf.fh.ginput4 = @gpcf_matern32at_ginput4;
    reccf.fh.cov = @gpcf_matern32at_cov;
    reccf.fh.trcov = @gpcf_matern32at_trcov;
    reccf.fh.trvar = @gpcf_matern32at_trvar;
    reccf.fh.recappend = @gpcf_matern32at_recappend;
    reccf.p=[];
    reccf.p.lengthScale=[];
    reccf.p.magnSigma2=[];
    if isfield(ri.p,'lengthScale') && ~isempty(ri.p.lengthScale)
      reccf.p.lengthScale = ri.p.lengthScale;
    end
    if isfield(ri.p,'magnSigma2') && ~isempty(ri.p.magnSigma2)
      reccf.p.magnSigma2 = ri.p.magnSigma2;
    end
    if isfield(ri, 'selectedVariables')
        reccf.selectedVariables = ri.selectedVariables;
    end
  else
    % Append to the record
    
    gpp = gpcf.p;
    
    if ~isfield(gpcf,'metric')
      % record lengthScale
      reccf.lengthScale(ri,:)=gpcf.lengthScale;
      if isfield(gpp,'lengthScale') && ~isempty(gpp.lengthScale)
        reccf.p.lengthScale = gpp.lengthScale.fh.recappend(reccf.p.lengthScale, ri, gpcf.p.lengthScale);
      end
    end
    
    % record magnSigma2
    reccf.magnSigma2(ri,:)=gpcf.magnSigma2;
    if isfield(gpp,'magnSigma2') && ~isempty(gpp.magnSigma2)
      reccf.p.magnSigma2 = gpp.magnSigma2.fh.recappend(reccf.p.magnSigma2, ri, gpcf.p.magnSigma2);
    end
  
  end
end

function [F,L,Qc,H,Pinf,dF,dQc,dPinf,params] = gpcf_matern32at_cf2ss(gpcf,x)
%GPCF_MATERN32_CF2SS Convert the covariance function to state space form
%
%  Description
%    Convert the covariance function to state space form such that
%    the process can be described by the stochastic differential equation
%    of the form: 
%      df(t)/dt = F f(t) + L w(t),
%    where w(t) is a white noise process. The observation model now 
%    corresponds to y_k = H f(t_k) + r_k, where r_k ~ N(0,sigma2).
%
%  References:
%    Simo Sarkka, Arno Solin, Jouni Hartikainen (2013).
%    Spatiotemporal learning via infinite-dimensional Bayesian
%    filtering and smoothing. IEEE Signal Processing Magazine,
%    30(4):51-61.
%

  % Check arguments
  if nargin < 2, x = []; end

  % Return model matrices, derivatives and parameter information
  [F,L,Qc,H,Pinf,dF,dQc,dPinf,params] = ...
      cf_matern32_to_ss(gpcf.magnSigma2, gpcf.lengthScale);
  
  % Check which parameters are optimized
  if isempty(gpcf.p.magnSigma2), ind(1) = false; else ind(1) = true; end
  if isempty(gpcf.p.lengthScale), ind(2) = false; else ind(2) = true; end
  
  % Return only those derivatives that are needed
  dF    = dF(:,:,ind);
  dQc   = dQc(:,:,ind);
  dPinf = dPinf(:,:,ind);
  
end
