Feature Selection Demo
Feature selection demo
Contents
Create synthetic data
numPoints = 300; r1 = 10; r2 = 5; noiseRatio = 0.3; nc1x1 = r1 * (2 * noiseRatio * rand(numPoints/2,1) - noiseRatio); nc1x2 = r1 * (2 * noiseRatio * rand(numPoints/2,1) - noiseRatio); nc2x1 = r2 * (2 * noiseRatio * rand(numPoints/2,1) - noiseRatio); nc2x2 = r2 * (2 * noiseRatio * rand(numPoints/2,1) - noiseRatio); angs1 = 2*pi*rand(numPoints/2, 1); angs2 = 2*pi*rand(numPoints/2, 1); %extraNoise = r1 * (2 * noiseRatio * rand(numPoints/2,1) - noiseRatio); extraNoise = zeros(numPoints/2, 1); x1 = [r1 * cos(angs1) + nc1x1; r2 * cos(angs2) + nc2x1]; x2 = [r1 * sin(angs1) + nc1x2 + extraNoise; r2 * sin(angs2) + nc2x2 + extraNoise]; % feature 3 is totally unrelated feature x3 = r1*rand(length(x1), 1); %x3 = [ones(numPoints/2,1) ; 2*ones(numPoints/2,1)]; x = [x1 x2 x3]; y = [ones(numPoints/2,1) ; 2*ones(numPoints/2,1)]; c1 = y == 1; c2 = y == 2; figure(1); clf; hold on; scatter(x1(c1), x2(c1),'b','o' ,'markerfacecolor','blue'); scatter(x1(c2), x2(c2),'b','+','markerfacecolor','blue'); title('Original'); numClasses = 2;

Run AdaBoost on Data
stump = BestDecisionStumpClassifier(numClasses); adacl = AdaBooster(stump); for i=1:6 [adacl, learnErr] = learn(adacl, x, y, i); fprintf('Error %f\n', learnErr); outs = computeOutputs(adacl, x); fprintf('Miss classified %d / %d\n', sum(y~=outs) , numPoints); c1c = y == 1 & y == outs; c1w = y == 1 & y ~= outs; c2c = y == 2 & y == outs; c2w = y == 2 & y ~= outs; fprintf('miss Class 1 : %d / %d\n', sum(c1w), sum(c1c+c1w)); fprintf('miss Class 2 : %d / %d\n', sum(c2w), sum(c2c+c2w)); fprintf('-----\n'); end featuresRank = getRankedFeatures(adacl); %Notice that features 3 is not even used fprintf('features used %d / 3\n', length(featuresRank)); for i=1:length(featuresRank); fprintf('feature %d : score %f\n', featuresRank(i).id, ... featuresRank(i).score); end
Decision tree for classification 1 if x1<6.26589 then node 2 elseif x1>=6.26589 then node 3 else 1 2 class = 2 3 class = 1 Error 0.320000 Miss classified 96 / 300 miss Class 1 : 96 / 150 miss Class 2 : 0 / 150 ----- Decision tree for classification 1 if x1<-5.73132 then node 2 elseif x1>=-5.73132 then node 3 else 1 2 class = 1 3 class = 1 Error 0.320000 Miss classified 96 / 300 miss Class 1 : 96 / 150 miss Class 2 : 0 / 150 ----- Decision tree for classification 1 if x1<-6.14224 then node 2 elseif x1>=-6.14224 then node 3 else 2 2 class = 1 3 class = 2 Error 0.180000 Miss classified 54 / 300 miss Class 1 : 54 / 150 miss Class 2 : 0 / 150 ----- Decision tree for classification 1 if x2<-6.39791 then node 2 elseif x2>=-6.39791 then node 3 else 1 2 class = 1 3 class = 1 Error 0.180000 Miss classified 54 / 300 miss Class 1 : 54 / 150 miss Class 2 : 0 / 150 ----- Decision tree for classification 1 if x2<-6.39791 then node 2 elseif x2>=-6.39791 then node 3 else 2 2 class = 1 3 class = 2 Error 0.100000 Miss classified 30 / 300 miss Class 1 : 30 / 150 miss Class 2 : 0 / 150 ----- Decision tree for classification 1 if x2<6.35024 then node 2 elseif x2>=6.35024 then node 3 else 1 2 class = 1 3 class = 1 Error 0.100000 Miss classified 30 / 300 miss Class 1 : 30 / 150 miss Class 2 : 0 / 150 ----- features used 2 / 3 feature 1 : score 1.766190 feature 2 : score 1.705226