博文

mysvmtrian

已有 1922 次阅读 2019-10-23 09:44 |个人分类:代码分享|系统分类:科研笔记| SVM, SMO

废话不说，上代码：

clear all;

close all;

clc;

%-------------------------------------------------

% set the initial value;

%-------------------------------------------------

N=300; %the training data set size; 300个训练点；

NN=2000; %the number of validation data set; 2000个验证点；

%------------------------------------------------

%produce the training set

%------------------------------------------------

fprintf('Producing the training set...\n');

fprintf('-----------------------------------------\n');

%运算中我们需要知道程序进行到哪一步了，这就需要在代码中嵌入这些指示性文字；

r=10; %半径=10；

wth=6; %宽度=6；

dis=-6.5; %距离=-6；

si=0.085;

kkk1=1;

kkk2=1;

%构建双月形状，设定半径、宽度、距离、以及核函数的方差，这个方差不是一蹴而就的，可以在第一次运算后，通过统计支持向量之间的最大距离和支持向量数目运算得出；

for i=1:NN; %at the beginning of every round, to produce the training set in random; 看了Yanbo Xue的程序，他是一次性产生3000个数据点，

if rand>0.98; %make sure the points occur in random. and half in half for each class;

theta=pi*(1*rand-0); %set the angle;

lenth=r+wth*((1*rand-0)-0.5); %the range;

h(i)=lenth*cos(theta); %the X axis;

ve(i)=lenth*sin(theta); %the Y axis;

y(i)=-1; %the expected response; class 1;

sample2(1,kkk2)=h(i);

sample2(2,kkk2)=ve(i);

kkk2=kkk2+1;

else

beta=-pi*(rand*1);

len_2=r+wth*((rand*1)-0.5);

h(i)=len_2*cos(beta)+r;

ve(i)=len_2*sin(beta)-dis;

y(i)=1; %class 2;

sample1(1,kkk1)=h(i);

sample1(2,kkk1)=ve(i);

kkk1=kkk1+1;

end

%这里是产生双月形状的代码，主要到数量是N+NN，也就是说训练集和验证集产生于同一个模型；

fprintf('Normalization...\n');

fprintf('-----------------------------------------\n');

%首先去掉均值；

miu_x=mean(h); %取得x1的均值；

miu_y=mean(ve); %取得x2的均值；

h=h-miu_x*ones(1,NN); %减去均值；相当于减去了直流分量；

ve=ve-miu_y*ones(1,NN); %减去均值；

max_h=max(abs(h)); %取得绝对值的最大值；

max_ve=max(abs(ve)); %陈述事实

h=h./max_h; %除以这个数；

ve=ve./max_ve;

sample1(1,:)=(sample1(1,:)-miu_x)./max_h;

sample2(1,:)=(sample2(1,:)-miu_x)./max_h;

sample1(2,:)=(sample1(2,:)-miu_y)./max_ve;

sample2(2,:)=(sample2(2,:)-miu_y)./max_ve;

tr_set=[h;ve;y]; %组合成一个训练集，响应没有变化；

seq_tr_set=randperm(NN); %重新洗牌；

C=[100000 1000000;1000000 100];

for i=1

[a b X]=INSVM1(C(i,1),C(i,2),N,tr_set,si,seq_tr_set);

i=1;

l=0;

while i<=N

if a(i)>0

l=l+1;

SV(l)=a(i);

y_sv(l)=X(3,i);

x_sv(:,l)=X(1:2,i);

ptr(l)=i;

end

i=i+1;

end

NS=l; %the number of support vectors.

err=0;

FV=zeros(1,NN);

for j=1:NN

FV(j)=0;

for l=1:NS

FV(j)=FV(j)+SV(l)*y_sv(l)*K(tr_set(1:2,j),x_sv(:,l),si);

end

FV(j)=FV(j)+b;

if (tr_set(3,j)*FV(j))>=1

elseif (tr_set(3,j)*FV(j))<0

err=err+1;

end

k1=1;

k2=1;

for i=1:NN

if tr_set(3,i)==1

svv1(:,k1)=[FV(i);i];

k1=k1+1;

else

svv2(:,k2)=[FV(i);i];

k2=k2+1;

end

figure

plot(svv1(2,:),svv1(1,:),'k*')

hold on

plot(svv2(2,:),svv2(1,:),'b*')

%now the validation is complete.

%start the decision curve drawing;画出决策超平面；

fprintf('Draw the judgement curve ...\n');

fprintf('------------------------------------\n');

figure;

% plot(tr_set(1,:),tr_set(2,:),'*');

plot(sample1(1,:),sample1(2,:),'k*');

hold on

plot(sample2(1,:),sample2(2,:),'b*')

hold on;

plot(x_sv(1,:),x_sv(2,:),'ko');

for il=1:400;

xl(il)=-1+il/200;

ul=10;

pl=2;

for jl=1:400;

yl(jl)=-1+jl/200;

Cur=[xl(il) yl(jl)]';

oo=0;

for l=1:NS

oo=oo+SV(l)*y_sv(l)*K(Cur,x_sv(:,l),si);

end

oo=oo-b;

zl=abs(oo);

if zl<ul;

ul=zl;

pl=yl(jl);

end

pl_l(il)=pl;

end

hold on;

plot(xl,pl_l,'r*');

figure

for i=1:N

if X(3,i)==-1

plot(X(1,i),X(2,i),'b*')

hold on

else

plot(X(1,i),X(2,i),'k*')

hold on

end

hold on

plot(xl,pl_l,'r*');

clear SV y_sv x_sv ptr svv1 svv2 xl yl zl oo pl pl_1

end

function [a b mix_tr_set]=INSVM1(C2,C1,N,tr_set,si,seq_tr_set)

%C2表示正类的惩罚系数，C1表示负类的惩罚系数，N表示训练样本的大小（N小于数据大小）

%，tr_set表示所有数据，si表示高斯核的方差

% seq_tr_set=randperm(length(tr_set(1,:)));

mix_tr_set=tr_set(:,seq_tr_set(1:N)); %注意到从2300个点中取得300个；

h=mix_tr_set(1,:); %第一行是x轴数据，或x1；

ve=mix_tr_set(2,:);

y=mix_tr_set(3,:); %注意到响应是跟着数据点走的；

%tr_set_1=[h;ve;y];

X=[h;ve]; %某些时候只需要用到两列，但是响应还是随着数据点走的；

fprintf('Now we start the SVM by SMO algorithm ...\n');

fprintf('-----------------------------------------\n');

%从这里开始，我们准备构建SMO算法；

%------------------------------------------------------

%set the SVM initial value;

%------------------------------------------------------

a=zeros(1,N); %the first initial value; 这就是著名的拉格朗日算子，每个数据点都有一个；

b=0; %bias. 偏置；

% C2=100; %the boundry; 惩罚算子；

% C1=100;

FX=zeros(1,N); %f(x(i)) i=1toN, since {a(i)}=0, so f(x)=0; f(x)=w'*x-b; or f(x)=SUM ai*yi*K(xi,xj)-b=0; f(xi)函数，一般有f(xi)=w'×xi-b；这里是f(xi)=sum ai*yi*K(xi,xj)-b; %判决函数

E=FX-y; %y: the expected response; 这是函数f（xi）与期望响应（或者叫做标号）的差异；

efxt=a; %efxt is the gap distance related parameters; 这个是对于那些算子=C的点对应的间隔；

eps=0.007; %threshold for stopping judgement; 阈值，（原目标-对偶目标）/（原目标+1）的比值对应停机条件，小于这个阈值，运算停止；

times=0; %at the start, times=0;the number of 外循环运算多少次了；

presv=0; %at the beginning, suppose there is no 相当于flag，表示是否找到不满足KKT条件的支持向量；

Gram=eye(N,N); %build the gram matrix, for recording the calculation results;

ot=0; %指示是否运算数目太多，迭代时间太长了；

totaltimes=0; %how many times of calculation operated? 一共运行多少次两算子计算了；

in=1; %set the initial value of in;in start from 1; 为了区分违反KKT条件的三种算子，有三种指示，a=0；a=C；C>a>0;

ic=1;

iii=0;

ker=zeros(N,N);

for i=1:N

for j=1:N

ker(i,j)=K(X(:,i),X(:,j),si);

end

ratio=1;

aaa=0;

while(ratio>eps)

%select the first point;

%--------------------------------------------------------------------------

for i=1:N

if y(i)==1;

Ck(i)=C2;

else

Ck(i)=C1;

end

aaa=aaa+1;

if (times==0)

i1=1;

else

while (in<=N)

if (a(in)>0) && (a(in)<Ck(in)&&(y(in)*FX(in)>1.01)||(y(in)*FX(in)<0.99)) %here, C is 100.

i1=in;

presv=1;

end

in=in+1;

if (presv)

break;

end

if presv==0

in=1;

while (ic<=N)

if (a(ic)==Ck(ic) && y(ic)*FX(ic)>1.1)||(a(ic)==0 && y(ic)*FX(ic)<.9)

i1=ic;

presv=1;

end

ic=ic+1;

if (presv)

break;

end

if (presv==0)

in=1;

ic=1;

i1=floor(rand*N)+1;

end

presv=0;

end

max_i=1;

i2old=i1;

if (times==0)

if i2old<N

i2=i2old+1;

i2old=i2;

end

else

min_E=E(i1);

max_E=E(i1);

if (E(i1)>=0)

for i=1:N

if (a(i)>0) && (a(i)<Ck(i))

if (E(i)<min_E)%&&(i~=i2old_x)

min_E=E(i);

i2=i;

end

else

for i=1:N

if (a(i)>0) && (a(i)<Ck(i))

if (E(i)>max_E)

max_E=E(i);

i2=i;

end

totaltimes=totaltimes+1;

%--------------------------------------------------------------------------

x1=X(:,i1);

x2=X(:,i2);

y1=y(i1);

y2=y(i2);

if y1==1

Cy1=C2;

else

Cy1=C1;

end

if y2==1

Cy2=C2;

else

Cy2=C1;

end

a1old=a(i1);

a2old=a(i2);

% if y2==1&&y1==-1

% L=max(0,a2old-a1old);

% H=min(C2,C1-a1old+a2old);

% elseif y2==-1&&y1==1

% L=max(0,a2old-a1old);

% H=min(C1,C2-a1old+a2old);

% elseif y2==1&&y1==1

% L=max(0,a1old+a2old-C2);

% H=min(C2,a1old+a2old);

% elseif y2==-1&&y1==-1

% L=max(0,a1old+a2old-C1);

% H=min(C1,a1old+a2old);

% end

if(y1~=y2)

L=max(0,a2old-a1old);

H=min(Cy2,Cy1-a1old+a2old);

else

L=max(0,a1old+a2old-Cy1);

H=min(Cy1,a1old+a2old);

end

if times~=0 && L>=H %%a1和a2构成的斜线是否经过方盒子

else

K11=1;

K22=1;

K12=ker(i1,i2);

E1=E(i1);

E2=E(i2);

k=K11+K22-2*K12;

if k==0

k=0.01;

end

a2new=a2old+y2*(E1-E2)/k;

if (a2new>H)

a2new=H;

else

if(a2new<L)

a2new=L;

end

a1new=a1old+y1*y2*(a2old-a2new);

a1new=max(0,a1new);

a(i1)=a1new;

a(i2)=a2new;

bold=b;

a1e=y1*(a1new-a1old);

a2e=y2*(a2new-a2old);

b1new=-E1-a1e-a2e*K12+bold;

b2new=-E2-a1e*K12-a2e+bold;

if a1new>0 &&y1==1&& a1new<Cy1

b=b1new;

else

if a2new>0 && a2new<Cy2

b=b2new;

else

if (a1new==0||a1new==Cy1)&&(a2new==0||a2new==Cy2)&&(L~=H)

b=(b1new+b2new)/2;

end

%--------------------------------------------------------------------------

if totaltimes>100*N

fprintf('how many knives?\n');

fprintf('-----------------------------------------\n');

break;

end

times=times+1;

FX=zeros(1,N);

for i=1:N;

for l=1:N

FX(i)=FX(i)+a(l)*y(l)*ker(i,l);

end

FX=FX+b*ones(1,N);

E=FX-y;

for i=1:N

if y(i)==1

efxt(i)=C2*max(0,1-y(i)*FX(i));

else

efxt(i)=C1*max(0,1-y(i)*FX(i));

end

W=0;

AAYYK=0;

for i=1:N

if (a(i)>0)

for j=1:N

if (a(j)>0)

AAYYK=AAYYK+a(i)*a(j)*y(i)*y(j)*ker(i,j);

end

suma=sum(a);

sume=sum(efxt);

W=suma-0.5*AAYYK;

ratio=((suma-2*W+sume)/(suma-W+sume+1));

fprintf('Now we can see the difference between original objective and dual one...\n');

fprintf('Ratio = %f\n',ratio);

iii=iii+1;

ratio_out(iii)=ratio;

end

end %this end compare to while(1);

转载本文请联系原作者获取授权，同时请注明本文来自强幸子科学网博客。
链接地址：https://blog.sciencenet.cn/blog-3421825-1203103.html

上一篇：libsvm Usage 参考libsvm————readme
下一篇：高斯核密度估计方法代码

收藏 IP: 219.239.227.*| 热度|

当前推荐数：0

该博文允许注册用户评论请点击登录评论 (0 个评论)

数据加载中...

返回顶部

强幸子

扫一扫，分享此博文

oldnewbird的个人博客分享 http://blog.sciencenet.cn/u/oldnewbird

博文

mysvmtrian

当前推荐数：0

该博文允许注册用户评论请点击登录评论 (0 个评论)

强幸子

全部作者的其他最新博文

全部精选博文导读

相关博文

oldnewbird的个人博客分享 http://blog.sciencenet.cn/u/oldnewbird

博文

mysvmtrian

当前推荐数：0

该博文允许注册用户评论 请点击登录 评论 (0 个评论)

强幸子

全部作者的其他最新博文

全部精选博文导读

相关博文

该博文允许注册用户评论请点击登录评论 (0 个评论)