load_eegdata;
subject = 1
tasks = [1 2 3 4 5]
%tasks = [2 3]
trials = 1:5
trials = squeeze(eegdata(subject,tasks,trials));
% Remove EOG channel
[nr,nc] = size(trials);
for r = 1:nr
for c = 1:nc
trials{r,c} = trials{r,c}(1:6,:);
end
end
classes = repmat((1:length(tasks))',1,length(trials));
function featuresOrCV = makeRawEEGFeatures(trials,CV,command) %%% featuresOrCV = makeRawEEGFeatures(trials,CV,command) %%% trials: ntasks x ntrials cell array of data %%% CV.parms contains %%% .nlags %%% featuresOrCV: ntasks x ntrials cell array of featurized data %%% or set to CV if command='make' if nargin > 2 && strcmp(command,'make') featuresOrCV = CV; return; end nlags = CV.parms.nlags; featuresOrCV = lagize(trials,nlags);This function requires
function lagged = lagize(data,nlags,delta)
% lagged = lagize(datacellofwindows,nlags,delta)
if nargin < 3
delta = 1;
end
nlags = nlags + 1;
[r,c] = size(data);
lagged = cell(r,c);
for ri = 1:r
for ci = 1:c
datacols = data{ri,ci};
ncols = size(datacols,2);
starts = [0:nlags-1]*delta+1;
laggedWindow = [];
for i = starts
laggedWindow = [laggedWindow; datacols(:,i:(ncols-starts(end)+i))];
end
lagged{ri,ci} = laggedWindow;
end
end
Test it by
>> CV.parms.nlags = 1;
>> features = makeRawEEGFeatures(trials,CV);
>> size(features)
ans =
5 5
>> size(features{1,1})
ans =
12 2499
We have a 5 x 5 cell array of matrices of features, where each matrix
is 12 x 2499, because we have voltages from 6 channels augmented by
the same voltages delayed by one sample. Starting with 2500 samples
(10 seconds of EEG sampled 250 times per second), we get 2499 total
samples.
function [X,Y] = selectSamples(oneTrial,indices,class,CV) X = oneTrial(:,indices); Y = repmat(class,1,length(indices));
First, let's discuss the parameters. For the raw EEG representation, the only paramter is the number of lags. The parameter sets are specified as a vector of structs, like this.
lagsraw = 0:16; clear parameterSets; k = 0; for nl = lagsraw k = k + 1; parameterSets(k).nlags = nl; endCross-validation is performed by choosing one trial to be the test trial. The remaining trials form the training data. We must also specify the fraction of samples from each trial that are to be randomly selected from the training trials for the validation set, and the number of times we repeat this random selection. Let's call the training data remaining after removing the validation data the tuning data, used to tune the classifier. For each repetition, a classifier is formed on the tuning data, applied to the validation data, and the resulting RMS error is averaged over these representations to evaluate each parameter set. The parameter set that is determined to give the lowest average validation data error is saved as the best parameters in CV.parms.
Then, a classifier is constructed on all the training data using these best parameters and applied to the test trial. This error is saved. Then the whole procedure is repeated after each trial is selected as the test trial. With five trials, this procedure is repeated five times.
Anyway, here is an example classifier function that implements linear discriminant analysis (LDA). It should work for any representation you come up with, so for now you can choose to not read through this code and skip to the next section.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Make a Linear Discriminant Classifier
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [fractionCorrect,classifierOrClasses] = LDA(data,CV,selectInOneTrialF,command)
%%% [fractionCorrect,classifierOrClasses] = LDA(train,CV)
%%% train.features: ntasks x ntrials cell array of featurized data
%%% .classes: ntasks x ntrials matrix of class labels
%%% CV contains for collectTrainValidation()
%%% .parms.firstMode
%%% .parms.nModes
%%% .validateIndices: ntasks x ntrials cell array of indices
%%% .classes: ntasks x ntrials cell array of class labels (ints)
%%% command: Optional. If present, must be 'make' and classifier is created.
%%% Returns
%%% classifier.weights
%%% .bias
%%% validateCorrect: fraction of validation samples correct
%%% X: training data
if nargin > 3 && strcmp(command,'make')
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% make the classifier
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if ~isfield(CV.parms,'firstMode')
CV.parms.firstMode = 1; %so this works if data.features are not cell array of
CV.parms.nModes = 1; %windows, but just nchannels x nsamples matrix
end
[X,Y,Xval,Yval] = collectPartitions(data,CV,selectInOneTrialF,'train');
[p,N] = size(X);
K = size(data.features,1);
%% Calculate class priors, means and sum of covariance matrices.
priors = zeros(1,K);
means = zeros(K,p);
covsum = zeros(p,p);
classNames = unique(Y);
for i = 1:length(classNames)
mask = Y == classNames(i);
Nthisclass = sum(mask);
priors(i) = Nthisclass / N;
Xmat = X(:,mask);
means(i,:) = mean(Xmat');
XmeanZero = Xmat - repmat(means(i,:)',1,Nthisclass);
covsum = covsum + XmeanZero * XmeanZero';
end
covariance = covsum / (N-K);
invcov = inv(covariance);
%% Calculate weights and biases for each class
weights = zeros(p,K);
bias = zeros(K,1);
for k = 1:K
weights(:,k) = invcov * means(k,:)';
bias(k) = - 0.5 * means(k,:) * weights(:,k) + log(priors(k));
end
classifier.weights = weights;
classifier.bias = bias;
if ~isempty(CV.validateIndices)
disc = classifier.weights' * Xval + repmat(classifier.bias,1,size(Xval,2));
[junk,whichmax] = max(disc);
classes = classNames(whichmax);
validateCorrect = sum(classes == Yval) / length(Yval);
else
validateCorrect = [];
end
classifierOrClasses = classifier;
fractionCorrect = validateCorrect;
elseif nargin > 3 && ~strcmp(command,'make')
error('Command to LDA must be ''make'' or absent.');
else
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% use the classifier
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[X,Y] = collectPartitions(data,CV,selectInOneTrialF);
disc = CV.classifier.weights' * X + repmat(CV.classifier.bias,1,size(X,2));
[junk,whichmax] = max(disc);
classNames = unique(Y);
classes = classNames(whichmax);
testCorrect = sum(classes == Y) / length(Y);
classifierOrClasses = classes;
fractionCorrect = testCorrect;
end
runCV runCV. Here is a complete example, including
some of the code shown above and using the functions defined above.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Making data
clear trials classes;
load_eegdata;
subject = 1
tasks = [1 2 3 4 5]
%tasks = [2 3]
trials = 1:5
trials = squeeze(eegdata(subject,tasks,trials));
% Remove EOG channel
[nr,nc] = size(trials);
for r = 1:nr
for c = 1:nc
trials{r,c} = trials{r,c}(1:6,:);
end
end
classes = repmat((1:length(tasks))',1,length(trials));
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Specify common paramters
validationRepetitions = 5;
validationFraction = 0.2;
lags = 0:16;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Raw lagged representation
clear parameterSets;
k = 0;
for nl = lags
k = k + 1;
parameterSets(k).nlags = nl;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Run all experiments
CV.raw = runCV(trials,classes,parameterSets,...
validationFraction,validationRepetitions,...
@makeRawEEGFeatures,@LDA,@selectSamples);
save allCV CV; % Save result to file named 'allcv.mat'
function features = makeSVDModes(trials,CV,command) %%% features = makeSVDModes(trials,CV) %%% trials: ntasks x ntrials cell array of data %%% CV.parms contains %%% .nlags %%% .winSize %%% .winShift %%% features: ntasks x ntrials cell array of featurized data if nargin > 2 && strcmp(command,'make') features = CV; return; end nlags = CV.parms.nlags; winSize = CV.parms.winSize; winShift = CV.parms.winShift; features = msfize(windowize(lagize(trials,nlags),winSize,winShift));It is based on
lagize given above plus the following
function in windowize.m
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function allwindows = windowize(data,wsize,winshift)
if nargin == 2
winshift = 1;
end
[r,c] = size(data);
allwindows = cell(r,c);
for ri = 1:r
for ci = 1:c
oneTrial = data{ri,ci};
nwin = 0;
for wi = 1:winshift:size(oneTrial,2)-wsize+1
nwin = nwin + 1;
end
for w = 1:nwin
start = (w-1)*winshift+1;
windows{w} = oneTrial(:,start:start+wsize-1);
end
allwindows{ri,ci} = windows;
end
end
and the following function in msfize
function msfwindows = msfize(data)
[r,c] = size(data);
msfwindows = cell(r,c);
for ri = 1:r
for ci = 1:c
windows = data{ri,ci};
nwin = length(windows);
msfwindowsTrial = cell(1,nwin);
for w = 1:nwin
windows{w} = windows{w} - repmat(mean(windows{w},2),1,size(windows{w},2));
[u,s] = mnf2(windows{w});
u = s';
for col = 1:size(u,2)
if u(1,col) < 0
u(:,col) = u(:,col) * -1;
end
end
msfwindowsTrial{w} = u;
end
msfwindows{ri,ci} = msfwindowsTrial;
end
end
which depends on mnf2.m:
function [phi,psi] = mnf2(X) % function [psi,phi] = mnf(X) % X - an p x N data matrix (N is number of samples) % % Out: % psi - the mixing coefficients (pxp) % phi - basis p x N % % Example: % s = sin(0:99); % n = [zeros(1,50) rand(1,50)]; % M = [1 0.5; 0.2 0.8]; % X = M * [s;n]; % subplot(2,1,1); % plot(X'); % subplot(2,1,2); % [psi,phi] = mnf2(X); % plot(phi'); [p,N] = size(X); dX = X(:,1:end-1) - X(:,2:end); [psi,D] = eig(X*X', dX*dX'); phi = psi' * X; psi = psi'; % to agree with output of mnf.m
We must also write a new selection function to deal with windows of basis vectors.
function [X,Y] = selectModes(oneTrial,indices,class,CV)
if ~iscell(oneTrial)
error('selectModes must be applied to cella array, but it is not.');
else
X = [];
Y = [];
modespan = CV.parms.firstMode : CV.parms.firstMode + ...
CV.parms.nModes - 1;
for w = indices
modes = oneTrial{w}(:,modespan);
modes = reshape(modes,size(modes,1)*size(modes,2),1);
X = [X modes];
Y = [Y class];
end
end
Now, put it all together as before, still using LDA to classify, and we can run this experiment by doing the following.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% MSF basis vector representation
clear parameterSets;
k = 0;
for winSize = winSizes
for nl = lags
for nModes = 1:nchannels*(nl+1)
for firstMode = 1:(nchannels*(nl+1))-nModes+1
k = k + 1;
parameterSets(k).nlags = nl;
parameterSets(k).winSize = winSize;
parameterSets(k).winShift = winShift;
parameterSets(k).firstMode = firstMode;
parameterSets(k).nModes = nModes;
end
end
end
end
%%% Run all experiments
CV.msf = runCV(trials,classes,parameterSets,...
validationFraction,validationRepetitions,...
@makeMSFModes,@LDA,@selectModes);
save allCV CV