|
1. 载入各种包
source("http://bioconductor.org/biocLite.R")
biocLite("affy")
biocLite("hgu133plus2probe")
biocLite("hgu133plus2.db")
biocLite("hgu133plus2cdf")
biocLite("DBI")
library(affy)
library(Biobase)
library(BiocGenerics)
library(affy)
library(Biobase)
library("AnnotationDbi")
library("hgu133plus2cdf")
library("hgu133plus2probe")
##library(help = "hgu133plus2probe")
## library(help = "AnnotationDbi")
library("hgu133plus2cdf")
library("hgu133plus2.db")
library(DBI)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(limma)
2. 数据读入和数据标准化
setwd(getwd())
data <- ReadAffy()
dim(data)
pdf("Before Normalization.pdf")
a<- boxplot(data,col=c("mediumturquoise","mediumturquoise","mediumturquoise","blue","blue","blue","hotpink","hotpink","hotpink"),main="Before Normalization",ylab = "expression value",las=1,font.lab=2)
print(a)
dev.off()
eset<- rma(data)
mydata <- data.frame(exprs(eset))
head(mydata)
pdf("After Normalization.pdf")
b<- boxplot(mydata,col=c("mediumturquoise","mediumturquoise","mediumturquoise","blue","blue","blue","hotpink","hotpink","hotpink"),main="Before Normalization",ylab = "expression value",las=1,font.lab=2)
print(b)
dev.off()
3. RMA进行
probsets <-as.vector(rownames(mydata))
alldata_symbol <- mget(probsets, hgu133plus2SYMBOL,ifnotfound = NA)
all <- as.matrix(alldata_symbol)
all_symbol <- all[which(is.na(all[,1]) == F),]
length(all_symbol) ##41923
all_symbol <- as.matrix(all_symbol)
alldata <- mydata[rownames(all_symbol),]
dim(alldata)
alldata_symbol <- as.matrix(data.frame(alldata,all_symbol))
dim(alldata_symbol) #41923 ##9
write.table(alldata_symbol,file = "alldata_symbol.txt",sep = "t")
alldata_symbol<- read.table(file = "alldata_symbol.txt")
head(alldata_symbol)
alldata_symbol_mean<- aggregate(alldata_symbol, by=list(symbol = alldata_symbol[,9]), mean)
write.table(alldata_symbol_mean,file = "alldata_symbol_mean.txt",sep = "t")
> groupA_data <- alldata_symbol_mean[,1:3]
> colnames(groupA_data)<- c("C1","C2","C3")
> rownames(groupA_data)<-rownames(alldata_symbol_mean)
> write.table(groupA_data,file = "groupA_data.txt")
> groupB_data <- alldata_symbol_mean[,4:6]
> rownames(groupB_data)<-rownames(alldata_symbol_mean)
> colnames(groupB_data)<- c("T1","T2","T3")
> write.table(groupB_data,file = "groupB_data.txt")
> groupC_data <- alldata_symbol_mean[,7:9]
> colnames(groupC_data)<- c("M1","M2","M3")
> rownames(groupC_data)<-rownames(alldata_symbol_mean)
> write.table(groupC_data,file = "groupC_data.txt")
data <- as.matrix(data[,1:11])
design <- model.matrix(~ -1+factor(c(1,1,1,1,1,1,2,2,2,2,2)))
colnames(design) <- c("group1", "group2","group3")
fit <- lmFit(data, design)
contrast.matrix <- makeContrasts(group2-group1,levels=design)
fit2 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit2)
x <- topTable(fit2, coef=1, adjust="BH", sort.by="P", number=50000);
y <- x[x$adj.P.Val< 0.05 & (x$logFC > 1 | x$logFC < -1),]; y; print("Number of genes in this list:"); length(y$ID)
DEG_groupB1 <- y[,c(1,2,5,6)]
write.table(DEG_groupB1,file = "DEG_groupB1.txt",sep = "t")
Archiver|手机版|科学网 ( 京ICP备07017567号-12 )
GMT+8, 2024-10-10 01:34
Powered by ScienceNet.cn
Copyright © 2007- 中国科学报社