%% MAUNA LOA CO2 MODELING EXAMPLE
%
%  Description:
%
%    In this demo we apply the state space inference methods to the 
%    well-known time series data consisting of atmospheric CO2 
%    concentration readings in parts per million (ppm) by volume from 
%    air samples collected at the Mauna Loa observatory, Hawaii (see
%    [1] for details and further references).
%
%    The benefit from the state space formulation is that the 
%    computational complexity is linear with respect to the number
%    of data points. Due to efficient matrix solvers in Matlab 
%    (favoring the traditional GP solution over sequential looping),
%    the advantages in speed only start to show in datasets with 
%    thousands of data points.
%
%  The methods in this demo are based on the paper:
%
%    [1] Arno Solin and Simo Sarkka (2014). Explicit link between periodic 
%        covariance functions and state space models. Accepted for 
%        publication in Proceedings of the Seventeenth International 
%        Conference on Artifcial Intelligence and Statistics (AISTATS 2014).
%
% Copyright:
%
%    2013-2014 Arno Solin and Simo Sarkka
%
% This software is distributed under the GNU General Public
% License (version 3 or later); please refer to the file
% License.txt, included with the software, for details.
%

%% Add path to the core functions

  addpath ../core/


%% Load data

  % We choose to include the pre-processing of the data in the 
  % scripts. This is because this enables us to include the original
  % data files exactly as they were provided by the NOAA (see the
  % data text files for more information).
  
  % Load and convert
  monthly = textread('co2_mm_mlo.txt','','commentstyle','shell');
  weekly  = textread('co2_weekly_mlo.txt','','commentstyle','shell');
  
  % Fix missing values
  monthly(monthly<-99) = nan;
  weekly(weekly<-99)  = nan;

  
%% Combine monthly and weekly data

  % First weekly value date
  t0 = min(weekly(:,4));
  
  % Indices in monthly values with dates smaller than t0
  ind = (monthly(:,3) < t0);
  
  % Combine monthly and weekly data
  t = [monthly(ind,3); weekly(:,4)];
  y = [monthly(ind,4); weekly(:,5)];
    
  % Remove nans (missing values)
  ind = ~isnan(y);
  t   = t(ind);
  y   = y(ind);
  
  % Only use data prior to 2010, retain newer for validation
  ind   = t<2010;
  yn    = y(ind);
  ymean = mean(yn);
  yn    = yn-ymean;
  to    = t(ind);
  
  
%% Show

  figure(1); clf
    plot(to,yn+ymean,'xk','MarkerSize',3)
    xlabel('Time (year)'); ylabel('Observed CO_2 concentration (ppm)')
    axis tight

    
%% Model setup

  clear model

  % Set data to model
  model.x = to;
  model.y = yn;

  % Gaussian likelihood model for GP regression
  model.sigma2 = 1;
  model.opt = true;

  % Slow trend:
  % A squared exponential covariance function 
  % to deal with the smooth long-term effects
  model.ss{1}.make_ss      = @cf_se_to_ss;
  model.ss{1}.lengthScale  = 100;
  model.ss{1}.magnSigma2   = 1e4;
  model.ss{1}.N            = 6;  
  model.ss{1}.opt          = {'magnSigma2','lengthScale'};
  
  % Faster non-periodic variations:
  % A Matern52 covariance function deals with short-term
  % non-periodic effects that remain otherwise unexplained
  model.ss{2}.make_ss      = @cf_matern32_to_ss;
  model.ss{2}.lengthScale  = 1;
  model.ss{2}.magnSigma2   = 0.5;  
  model.ss{2}.opt          = {'magnSigma2','lengthScale'};  
  
  % Yearly oscillation:
  % A quasi-periodic covariance function deals with peridic 
  % variation in the data. Here the quasi-periodic covariance function 
  % is a product of a periodic covariance function and a squared
  % exponential.
  model.ss{3}.make_ss      = @cf_quasiperiodic_to_ss;
  model.ss{3}.lengthScale  = 1;
  model.ss{3}.magnSigma2   = 5;
  model.ss{3}.period       = 1; % one year
  model.ss{3}.N            = 6;
  model.ss{3}.nu           = 3/2;
  model.ss{3}.mN           = 6;
  model.ss{3}.mlengthScale = 140;
  model.ss{3}.opt          = {'magnSigma2','lengthScale','mlengthScale'};
  
  
%% Optimize parameters
  
  % Options
  options = optimset('GradObj','on');
  options = optimset(options,'TolX', 1e-3,'TolFun',1e-3);
  options = optimset(options,'LargeScale', 'off');
  options = optimset(options,'Display', 'iter');
  options = optimset(options,'DerivativeCheck', 'off');
  options = optimset(options,'display', 'iter');
  
  % Find hyperparameters:
  % Here we use 'fminunc' from the optimization toolbox. You could 
  % also use custom routines (such as some bfgs implementation).
  tic
  [model,lik] = ss_optimize(model, ...
    'optimizer',@fminunc, ...
    'options',options);
  toc
  
  % Show likelihood
  fprintf('Model marginal likelihood: %.2f \n',lik)
  
  
%% ... or load the pre-calculated results
%
%  load model.mat
%  
%  
%% Predict using state space
  
  % Define test inputs
  xt = 2010:1/24:2020;
  
  % Predict test points
  [meanf,Varf] = ss_predict(model,'xt',xt);
  
  

%% The full GP solution for comparison
  
  % This code solves the GP regression problem the old school way,
  % by constructing the covariance matrix. This code and the result
  % from it is only included for validation of the state space result.

  % The hyperparameters for the quasi-periodic covariance function
  nu           = model.ss{3}.nu;
  magnSigma2   = model.ss{3}.magnSigma2;
  lengthScale  = model.ss{3}.lengthScale;
  mlengthScale = model.ss{3}.mlengthScale;
  period       = model.ss{3}.period;
  
  % Define covariance functions: quasi-periodic
  kp = @(t) magnSigma2.* ...
    exp(-2*sin(2*pi/period*t/2).^2/lengthScale^2).* ...
    1/gamma(nu).*2^(1-nu).*(sqrt(2*nu)*t/mlengthScale).^nu.* ...
    besselk(nu,sqrt(2*nu)*t/mlengthScale);
  
  % The hyperparameters for the Matern covariance function
  nu           = 3/2;
  magnSigma2   = model.ss{2}.magnSigma2;
  lengthScale  = model.ss{2}.lengthScale;
  
  % Define covariance functions: Matern (nu=3/2)
  k32 = @(t) magnSigma2.* ...
    1/gamma(nu)*2^(1-nu)*(sqrt(2*nu)*t/lengthScale).^nu.* ...
    besselk(nu,sqrt(2*nu)*t/lengthScale);
  
  % Define covariance functions: Matern (nu=inf)
  kse = @(t) model.ss{1}.magnSigma2.* ...
    exp(-t.^2/2/model.ss{1}.lengthScale.^2);
        
  % Make covariance matrice grids
  [X1 Y1] = meshgrid(model.x,model.x);
  [X2 Y2] = meshgrid(model.x,xt);
  [X3 Y3] = meshgrid(xt,xt);
  
  % Allocate space for the covariance matrices
  Coo = zeros(size(X1)); 
  Cto = zeros(size(X2));
  Ctt = zeros(size(X3));
  
  % Calculate covariances
  covfun = {kse,k32,kp};
  for j=1:numel(covfun) 
    foo = covfun{j}(abs(X1-Y1)); foo(isnan(foo)) = model.ss{j}.magnSigma2;
    Coo = Coo+foo;
    foo = covfun{j}(abs(X2-Y2)); foo(isnan(foo)) = model.ss{j}.magnSigma2;
    Cto = Cto+foo;
    foo = covfun{j}(abs(X3-Y3)); foo(isnan(foo)) = model.ss{j}.magnSigma2;
    Ctt = Ctt+foo;
  end
  
  % Solve GP (naive way)
  %mu = Cto/(Coo + model.sigma2*eye(numel(model.x)))*model.y(:);
  %V  = Ctt - Cto/(Coo + model.sigma2*eye(numel(model.x)))*Cto';  
  %V = diag(V);
  
  % Solve GP (using Cholesky)
  L = chol(Coo + model.sigma2*eye(numel(model.x)),'lower');
  mu = Cto*(L'\(L\model.y(:)));
  v = L\Cto';
  V = diag(Ctt) - sum(v'.*v',2);
  
  % This is the estimate from the naive full GP solution
  GP_Eft   = mu;
  GP_Varft = V;
    
  
%% Visualize the GP regression results
  
  % Show result
  figure(1); clf; hold on
  
    % Show the grey patch
    p=patch([xt fliplr(xt)], ...
            ymean+[meanf + 1.96*sqrt(Varf) fliplr(meanf - 1.96*sqrt(Varf))],1);
    set(p,'EdgeColor','none','FaceColor',[.8 .8 .8])
    
    % Show the observations
    p2=plot(t,y,'xk','MarkerSize',3,'LineWidth',0.1);

    % Show the full GP solution
    p3=plot(xt,ymean+GP_Eft+1.96*sqrt(GP_Varft),'-k', ...
         xt,ymean+GP_Eft-1.96*sqrt(GP_Varft),'-k', ...
         'LineWidth',.1);
    
    % Labels
    xlabel('Time (years)');
    ylabel('CO_2 concentration (ppm)')
    axis tight, grid on
    box on
    
    % Ticks
    set(gca,'XTick',1960:5:2020, ...
            'YTick',300:10:450, ...
            'YTickLabel',300:10:450)
    
    % Zoom in to the years 2000-2020
    xlim([2000 2020])
    ylim auto  
  
    % Figure window options
    set(gcf,'Color','w')

    % Shoe legend
    legend([p2;p;p3], ...
        'Observations','State space result','Full GP result', ...
        'Location','NW')
    
