% Multiple Linear Regression Analysis using regress
% Lesson #8, Intro to Matlab programming SU 2013
% Author: Andres Patrignani
% Date: 6/24/2013
%% Read file with
% Load cornBiomass.xlsx from http://soilphysics.okstate.edu/teaching/soil-5110
[num,txt] = xlsread ('cornBiomass.xlsx',1);
%% Extract Variables
height = num(:,1);
stemD = num(:,2);
dryBiomass = num(:,3);
N = length(dryBiomass);
%% Multiple regression
% Set hypotetical model with terms that may describe the data
X = [ones(size(height)) height stemD height.*stemD]; %height.*stemD.^2
% Note: When computing statistics, X should includea column of 1s so that
% the model contains a constant term. The F statisticand its p value are computed
% under this assumption,and they are not correct for models without a constant.
%% Run regress to calculate stats and determine which terms are not significant
[b,bint,R,Rint,stats] = regress(dryBiomass,X); % Removes NaN data.
% b-->Coefficientestimates for a multilinear regression of the responses in y
% onthe predictors in X
% bint-->The first columnof bint contains lower confidence bounds for eachof the p
% coefficient estimates; the second columncontains upper confidence bounds.
% If it contains zero is not significant, and can be discarded.
% r-->residuals
% rint-->intervalsthat can be used to diagnose outliers. If the interval rint(i,:)
% forobservation i does not contain zero, the correspondingresidual is larger than
% expected in 95% of new observations, suggestingan outlier.
% stat-->returnsa 1-by-4 vector stats that contains, in order,the R2 statistic,
% the F statistic and its p value,and an estimate of the error variance.
%% Plot surface to see the plane of the full model.
subplot(1,2,1),scatter3(height,stemD,dryBiomass,'filled')
title ('Full Model','FontSize',14,'FontWeight','bold')
hold on
Heightfit = min(height):1:max(height); % Get max and min of height to construct the grid.
stemDfit = min(stemD):0.1:max(stemD); % Get max and min of stemD to construct the grid.
[HEIGHTFIT,STEMDFIT] = meshgrid(Heightfit,stemDfit); % Creates the grid.
YFIT = b(1) + b(2)*HEIGHTFIT + b(3)*STEMDFIT + b(4).*HEIGHTFIT.*STEMDFIT ; % Generate .
% values in the third dimension by using the full model and the
% arbitrary Heightfit and stemfit values.
mesh(HEIGHTFIT,STEMDFIT,YFIT)
view(50,10)
xlabel('Height(cm)','FontSize',12,'FontWeight','bold');
ylabel('StemD (cm)','FontSize',12,'FontWeight','bold')
zlabel('Dry Biomass (g)','FontSize',12,'FontWeight','bold')
grid off
%% Second run with restricted terms and F-Test
% Since zero was present in the 95% confidence interval of the coefficient
% in the height term, this term was removed to test the performance of a simpler
% model with less terms.
Xrestricted = X(:,[1 3 4]);
[bres, bintres, Rres, Rintres, Statsres] = regress(dryBiomass,Xrestricted);
%% Plot restricted model
subplot(1,2,2), scatter3(height,stemD,dryBiomass,'filled') % Create subplot 2.
title('Restricted Model','FontSize',14,'FontWeight','bold')
hold on
Heightfit = min(height):1:max(height); % Get max and min of height to construct the grid
stemDfit = min(stemD):0.1:max(stemD); % Get max and min of stemD to construct the grid
[HEIGHTFIT,STEMDFIT] = meshgrid(Heightfit,stemDfit); % Creates the grid.
YFIT2 = bres(1) + bres(2)*STEMDFIT + bres(3).*HEIGHTFIT.*STEMDFIT; % Generate .
% values in the third dimension by using the restricted model and the
% arbitrary Heightfit and stemfit values.
mesh(HEIGHTFIT,STEMDFIT,YFIT2) % Create mesh. Surf could also be used.
view(50,10) % Set angle of view when the chart shows up.
xlabel('Height(cm)','FontSize',12,'FontWeight','bold');
ylabel('StemD (cm)','FontSize',12,'FontWeight','bold')
zlabel('Dry Biomass (g)','FontSize',12,'FontWeight','bold')
grid off
%% F-test (may need to be checked for errors
%SSE = nansum(R.^2);
%SSER = nansum(Rres.^2);
%q = 1; % restricted terms
%k = 4; % degrees of freedom.
%F = ((SSER-SSE)/q) / (SSE/(N-k));
% Then look in F table with error and DF to accept or reject. Accepting the
% null hypothesys means that the terms removed from the full model were
% actually not important, and so the restricted model can be used as a
% simpler model.