|
目的:应用R语言逐步回归的常用方法包括MASS包中的stepAIC函数及vegan包中的ordistep函数等(RDA分析
选择单一响应变量),其中stepAIC函数的选择标准为AIC(赤池信息准则)、ordistep函数的选择标准为决定系数
或解释变量显著度。 BIC(贝叶斯信息准则)与HQIC(Hannan-Quinn信息准则)也是重要的模型优度判别指标,但
未发现常用的R软件包集成这两种方法。为使用方便,对这两种准则的逐步回归进行集成。
AIC、BIC、HQIC的各自特点。
引用文章
https://www.shangyexinzhi.com/article/5117452.html
AIC
AIC ( Akaike Information Criterion ),赤池信息准则,其计算公式如下:
其中, n 为样本个数, k 为参数个数。
在回归模型中, k 即为自变量个数,但在时序预测模型中, k 为阶数。
BIC
BIC ( Bayesian Information Criterion ),贝叶斯信息准则,其计算公式如下:
其中, n 为样本个数, k 为参数个数。
HQIC
HQIC ( Hannan-Quinn Information Criterion ), HQ 信息准则,其计算公式如下:
其中, n 为样本个数, k 为参数个数。
这三个指标,都是在误差项 SSE 的基础上,增加了惩罚项(参数个数)。模型越复杂,指标会变得越大。但总体上,要求信息损失准则指标,越小越好。
一般情况下, BIC 适合大样本量, AIC 适合小样本量,而 HQIC 适合中等样本量。
逐步回归函数代码及试用
#####################################
#####################################
####################数据形式
> head(round(xdata,3))
env1 env2 env3 env4 env5 env6 env7 env8
m_community 1 -0.784 -0.127 -0.257 -0.122 -0.161 -0.921 -0.725 1.070
m_community 2 -0.784 -0.127 -0.257 -0.181 -0.415 -0.883 -0.718 0.950
m_community 3 -0.784 -0.127 -1.908 -1.260 -1.918 1.952 -0.778 -0.927
m_community 4 0.582 -0.127 -0.301 0.167 -0.375 -1.097 -0.137 0.551
m_community 5 -0.784 -0.127 -1.604 -1.210 -1.634 1.620 -0.778 -0.835
m_community 6 -0.711 -0.127 -0.301 -0.065 -0.491 -0.804 -0.621 0.989
> head(ydata)
[1,] -1.5542772
[2,] -1.5459459
[3,] -1.5542772
[4,] -1.5542772
[5,] -1.5542772
[6,] -0.4765894
#####################################BIC向前选择逐步回归
BIC_forward=function(xdata,ydata)
{
xdata=as.data.frame(xdata)
BIC=c(Inf)
n=length(ydata)
j=1
for(i in 1:ncol(xdata))
{
BICi=n*log(sum((summary(lm(ydata~xdata[,i]))$residuals)^2)/n)+log(n)*(j+1)
BIC=c(BIC,BICi)
BIC
minBIC=which.min(BIC)-1
}
print(paste("Step",j," BIC=",BIC[which.min(BIC)]," colname_",colnames(xdata)[minBIC]))
xdatanew=xdata[,minBIC]
xdata=xdata[,-minBIC]
for (j in 2:ncol(xdata))
{
BIC=c(Inf)
for(i in 1:ncol(xdata))
{
xdatanew_=cbind(xdatanew,xdata[,i])
BICi=n*log(sum((summary(lm(ydata~xdatanew_))$residuals)^2)/n)+log(n)*(j+1)
BIC=c(BIC,BICi)
BIC
minBIC=which.min(BIC)-1
}
print(paste("Step",j," BIC=",BIC[which.min(BIC)]," colname_",colnames(xdata)[minBIC]))
xdatanew=cbind(xdatanew,xdata[,minBIC])
xdata=xdata[,-minBIC]
}
}
AIC_forward=function(xdata,ydata)
{
xdata=as.data.frame(xdata)
AIC=c(Inf)
n=length(ydata)
j=1
for(i in 1:ncol(xdata))
{
AICi=n*log(sum((summary(lm(ydata~xdata[,i]))$residuals)^2)/n)+2*(j+1)
AIC=c(AIC,AICi)
AIC
minAIC=which.min(AIC)-1
}
print(paste("Step",j," AIC=",AIC[which.min(AIC)]," colname_",colnames(xdata)[minAIC]))
xdatanew=xdata[,minAIC]
xdata=xdata[,-minAIC]
for (j in 2:ncol(xdata))
{
AIC=c(Inf)
for(i in 1:ncol(xdata))
{
xdatanew_=cbind(xdatanew,xdata[,i])
AICi=n*log(sum((summary(lm(ydata~xdatanew_))$residuals)^2)/n)+2*(j+1)
AIC=c(AIC,AICi)
AIC
minAIC=which.min(AIC)-1
}
print(paste("Step",j," AIC=",AIC[which.min(AIC)]," colname_",colnames(xdata)[minAIC]))
xdatanew=cbind(xdatanew,xdata[,minAIC])
xdata=xdata[,-minAIC]
}
}
HQIC向前选择逐步回归
<span style=\".\"text-wrap:\">
HQIC_forward=function(xdata,ydata)
{
xdata=as.data.frame(xdata)
HQIC=c(Inf)
n=length(ydata)
j=1
for(i in 1:ncol(xdata))
{
HQICi=n*log(sum((summary(lm(ydata~xdata[,i]))$residuals)^2)/n)+log(log(n))*(j+1)
HQIC=c(HQIC,HQICi)
HQIC
minHQIC=which.min(HQIC)-1
}
print(paste("Step",j," HQIC=",HQIC[which.min(HQIC)]," colname_",colnames(xdata)[minHQIC]))
xdatanew=xdata[,minHQIC]
xdata=xdata[,-minHQIC]
for (j in 2:ncol(xdata))
{
HQIC=c(Inf)
for(i in 1:ncol(xdata))
{
xdatanew_=cbind(xdatanew,xdata[,i])
HQICi=n*log(sum((summary(lm(ydata~xdatanew_))$residuals)^2)/n)+log(log(n))*(j+1)
HQIC=c(HQIC,HQICi)
HQIC
minHQIC=which.min(HQIC)-1
}
print(paste("Step",j," HQIC=",HQIC[which.min(HQIC)]," colname_",colnames(xdata)[minHQIC]))
xdatanew=cbind(xdatanew,xdata[,minHQIC])
xdata=xdata[,-minHQIC]
}
}
##################################
#################################应用函数观察效果
#################################
> BIC_forward(xdata,ydata)
[1] "Step 1 BIC= -10.3763899820553 colname_ env7"
[1] "Step 2 BIC= -7.74189091052095 colname_ env1"
[1] "Step 3 BIC= -1.96929274332684 colname_ env4"
[1] "Step 4 BIC= 1.29691565598463 colname_ env6"
[1] "Step 5 BIC= 7.78015915679914 colname_ env5"
[1] "Step 6 BIC= 14.9842231232145 colname_ env3"
[1] "Step 7 BIC= 22.1854867468494 colname_ env2"
> AIC_forward(xdata,ydata)
[1] "Step 1 AIC= -20.8476282641888 colname_ env7"
[1] "Step 2 AIC= -23.4487483337212 colname_ env1"
[1] "Step 3 AIC= -22.9117693075938 colname_ env4"
[1] "Step 4 AIC= -24.8811800493491 colname_ env6"
[1] "Step 5 AIC= -23.6335556896014 colname_ env5"
[1] "Step 6 AIC= -21.6651108642528 colname_ env3"
[1] "Step 7 AIC= -19.6994663816846 colname_ env2"
> HQIC_forward(xdata,ydata)
[1] "Step 1 HQIC= -20.8895963997022 colname_ env7"
[1] "Step 2 HQIC= -23.5117005369913 colname_ env1"
[1] "Step 3 HQIC= -22.9957055786207 colname_ env4"
[1] "Step 4 HQIC= -24.9861003881326 colname_ env6"
[1] "Step 5 HQIC= -23.7594600961416 colname_ env5"
[1] "Step 6 HQIC= -21.8119993385497 colname_ env3"
[1] "Step 7 HQIC= -19.8673389237382 colname_ env2"
在AIC、BIC、HQIC下第一步引入的因子都为环境变量env7,BIC的惩罚因子敏感性高于HQIC高于AIC,因此
BIC前向选择逐步回归在第一步的BIC最小为最优模型。而HQIC与AIC均在Step4时最小,模型解释变量包含
env7、env1、env4与env6。
Archiver|手机版|科学网 ( 京ICP备07017567号-12 )
GMT+8, 2024-10-20 03:00
Powered by ScienceNet.cn
Copyright © 2007- 中国科学报社