||
entrezID_text <- "4312 8318 10874 55143 55388 991 6280 2305 9493 1062 3868 4605 9833 9133 6279 10403 8685 597 7153 23397" entrezID <- read.table(text=entrezID_text, header=F) head(entrezID)
V1 1 4312 2 8318 3 10874 4 55143 5 55388 6 991
entrezID <- entrezID$V1 head(entrezID)
[1] 4312 8318 10874 55143 55388 991
# 这里的ENTREZ ID是从clusterProfiler里面提取是,是人的, # 所以用了人的注释库, org.Hs.eg.db# 这个库半年左右更新一次,也有可能漏更新,还是自己准备数据的靠谱 library(org.Hs.eg.db)
# readable=T: 原文档无这个参数,使用的是setReadble函数 MF <- enrichGO(entrezID, "org.Hs.eg.db", ont = "MF", keytype = "ENTREZID", pAdjustMethod = "BH", pvalueCutoff = 0.05, qvalueCutoff = 0.1, readable=T) head(summary(MF))
result <- simplify(MF, cutoff=0.7, by="p.adjust", select_fun=min) # 去除前 dim(MF) # [1] 367 9 # 去除后 dim(result) [1] 142 9
dotplot(result, showCategory = 10)
enrichMap(result, vertex.label.cex=1.2, layout=igraph::layout.kamada.kawai)
# geneList为一个vector,每个元素的名字为基因名,值为FoldChange,用于可视化点。 # > str(geneList) # Named num [1:12495] 4.57 4.51 4.42 4.14 3.88 ... - attr(*, "names")= chr [1:12495] "4312" "8318" "10874" "55143" ... # 这个geneList怎么获得的,会在后面的GSEA分析时提到 cnetplot(result, foldChange=geneList)
awk 'BEGIN{OFS=FS="\t"}{if(FNR==1) print "Gene\tTerm"; else {split($8,geneL,"/"); for(i in geneL) print geneL[i],$2}}' MF | head Gene Term PRDX6 cell adhesion molecule binding MFGE8 cell adhesion molecule binding FSCN1 cell adhesion molecule binding ATXN2L cell adhesion molecule binding YWHAZ cell adhesion molecule binding CTNNA2 cell adhesion molecule binding ADAM15 cell adhesion molecule binding LDHA cell adhesion molecule binding PKM cell adhesion molecule binding
# clusterProfiler3.4.4版本是没有readable参数的,原文档有 kk <- enrichKEGG(entrezID, organism="hsa", pvalueCutoff=0.05, pAdjustMethod="BH", qvalueCutoff=0.1)
setReadable(kk, "org.Hs.eg.db") Error in EXTID2NAME(OrgDb, genes, keytype) : keytype is not supported... # 即便是如下操作也没有作用 a = bitr_kegg(names(geneList), fromType = 'ncbi-geneid', toType="kegg", organism="hsa") # Warning message: # In bitr_kegg(names(geneList), fromType = "ncbi-geneid", toType = "kegg", : # 0.77% of input gene IDs are fail to map... kk <- enrichKEGG(a$kegg, organism="hsa", keyType="kegg", pvalueCutoff=0.05, pAdjustMethod="BH", qvalueCutoff=0.1) setReadable(kk, org.Hs.eg.db, key) # Error in EXTID2NAME(OrgDb, genes, keytype) : keytype is not supported...
setReadable(kk, org.Hs.eg.db, keytype="ENTREZID")
if (keytype == "auto") { keytype <- x@keytype if (keytype == "UNKNOWN") { stop("can't determine keytype automatically; need to set 'keytype' explicitly...") } }
entrezID <- bitr_kegg(entrezID, fromType='ncbi-geneid', toType='kegg', organism="ath") kk <- enrichKEGG(entrezID$kegg, organism="ath", pvalueCutoff=0.05, pAdjustMethod="BH", qvalueCutoff=0.1) # 这个setreadble是可以转换成功的 result <- setReadable(kk, "org.At.tair.db", keytype="TAIR")
id_with_fc = "ID;FC 4312;2 8318;3 10874;4 55143;5 55388;6 991;7" id_with_fc <- read.table(text=id_with_fc, header=T, sep=";") id_with_fc2 <- id_with_fc[,2] names(id_with_fc2) <- id_with_fc[,1] # 排序是必须的,记住排序方式 id_with_fc2 <- sort(id_with_fc2, decreasing=T) gsecc <- gseGO(geneList=id_with_fc2, ont="CC", OrgDb=org.Hs.eg.db, verbose=F) # 昨天测试了其它数据,参数无问题。这里没有实际运行,盗用数据,每列的解释见本段开头的文章 head(as.data.frame(gsecc)) ## ID Description setSize ## GO:0031982 GO:0031982 vesicle 2880 ## GO:0031988 GO:0031988 membrane-bounded vesicle 2791 ## GO:0005576 GO:0005576 extracellular region 3296 ## GO:0065010 GO:0065010 extracellular membrane-bounded organelle 2220 ## GO:0070062 GO:0070062 extracellular exosome 2220 ## GO:0044421 GO:0044421 extracellular region part 2941 ## enrichmentScore NES pvalue p.adjust qvalues ## GO:0031982 -0.2561837 -1.222689 0.001002004 0.03721229 0.02816364 ## GO:0031988 -0.2572169 -1.226003 0.001007049 0.03721229 0.02816364 ## GO:0005576 -0.2746489 -1.312485 0.001009082 0.03721229 0.02816364 ## GO:0065010 -0.2570342 -1.222048 0.001013171 0.03721229 0.02816364 ## GO:0070062 -0.2570342 -1.222048 0.001013171 0.03721229 0.02816364 ## GO:0044421 -0.2744658 -1.310299 0.001014199 0.03721229 0.02816364 # 绘制GSEA图 gseaplot(gsecc, geneSetID="GO:0000779")
self_anno <- "ont;gene KEGG_GLYCOLYSIS_GLUCONEOGENESIS;gene1 KEGG_GLYCOLYSIS_GLUCONEOGENESIS;gene2 KEGG_GLYCOLYSIS_GLUCONEOGENESIS;gene3 KEGG_GLYCOLYSIS;gene1 KEGG_GLYCOLYSIS;gene4 KEGG_CYP;gene5" self_anno <- read.table(text=self_anno, header=T, sep=";", quote="") # 没具体看代码怎么写的,保险期间,设置跟示例一样的列名字 colnames(self_anno) <- c("ont", "gene") geneL <- c("gene1", "gene2", "gene4") # self_enrich与之前enrichGO的输出结果格式一致 self_enrich <- enricher(geneL, TERM2GENE=self_anno) # self_gsea与之前gseGO的输出结果格式一致 self_gsea <- GSEA(geneL, TERM2GENE=self_anno, verbose=F)
Archiver|手机版|科学网 ( 京ICP备07017567号-12 )
GMT+8, 2024-11-19 20:39
Powered by ScienceNet.cn
Copyright © 2007- 中国科学报社