||
随着研究的逐渐深入,我们对绘图的要求越来越高,各种之前使用的较少的图形如今追求热度和新颖程度,都开始逐渐在大文章中显现。如下图。
这是最近刚发表于Nature Ecology & Evolution中的图1b。如何绘制呢?
Thorsten Thiergart, Paloma Durán, Thomas Ellis, Nathan Vannier, Ruben Garrido-Oter, Eric Kemen, Fabrice Roux, Carlos Alonso-Blanco, Jon Ågren, Paul Schulze-Lefert & Stéphane Hacquard. Root microbiota assembly and adaptive differentiation among European Arabidopsis populations. Nature Ecology & Evolution 4, 122-131, doi:10.1038/s41559-019-1063-3 (2020).
这次的聚类加物种丰度展示让我们学习一波。之前推出了用R语言的plot绘制的教程。
但修改细节仍比较麻烦。今天更新基于ggplot2系统的教程。
这里的ggtree需要使用19年7月以后的版本,因为这以后的版本才支持将聚类结果转化为树结构。
如果你的Bioconductor版本较旧,可能一直会安装旧版ggtree。升新方法如下:
## 先卸载先前的安装控制程序remove.packages(c("BiocInstaller", "BiocManager", "BiocVersion")) ## 再安装新版程序install.packages("BiocManager") BiocManager::install(update=TRUE, ask=FALSE)
library("ggplot2")library("ggdendro") # library(remotes)library(phyloseq)library(tidyverse)library(ggtree)library( ggstance) # library(amplicon) vegan_otu = function(physeq){ OTU = otu_table(physeq) if(taxa_are_rows(OTU)){ OTU = t(OTU) } return(as(OTU,"matrix")) } vegan_tax <- function(physeq){ tax <- tax_table(physeq) return(as(tax,"matrix")) }
# 从R数据文件中读入# ps = readRDS("data/ps_liu.rds")# 从文件读取metadata = read.table("http://210.75.224.110/github/EasyAmplicon/data/metadata.tsv", header=T, row.names=1, sep="\t", comment.char="", stringsAsFactors = F) otutab = read.table("http://210.75.224.110/github/EasyAmplicon/data/otutab.txt", header=T, row.names=1, sep="\t", comment.char="", stringsAsFactors = F) taxonomy = read.table("http://210.75.224.110/github/EasyAmplicon/data/taxonomy.txt", header=T, row.names=1, sep="\t", comment.char="", stringsAsFactors = F)# 提取两个表中共有的ID# Extract only those ID in common between the two tablesidx = rownames(otutab) %in% rownames(taxonomy) otutab = otutab[idx,] taxonomy = taxonomy[rownames(otutab),]# 使用amplicon包内置数据# data("metadata")# data(otutab)# 导入phyloseq(ps)对象ps = phyloseq(sample_data(metadata),otu_table(as.matrix(otutab), taxa_are_rows=TRUE), tax_table(as.matrix(taxonomy)))
# 样本间距离类型:Bray-Curtisdist = "bray"# phyloseq(ps)对象标准化ps1_rela = transform_sample_counts(ps, function(x) x / sum(x) )# 导出OTU表otu = as.data.frame(t(vegan_otu(ps1_rela)))# 预览otu[1:3,1:3]#计算距离矩阵unif = phyloseq::distance(ps1_rela , method=dist)# 聚类树,method默认为completehc <- hclust(unif, method = "complete")# 对树分组clus <- cutree(hc, 3)# 提取树中分组的标签和分组编号d = data.frame(label = names(clus), member = factor(clus))# 提取样本元数据map = as.data.frame(sample_data(ps))# 合并树信息到样本元数据dd = merge(d,map,by = "row.names",all = F) row.names(dd) = dd$Row.names dd$Row.names = NULL dd[1:3,1:3]# ggtree绘图 #----p = ggtree(hc) %<+% dd + geom_tippoint(size=5, shape=21, aes(fill=factor(Group), x=x)) + # geom_tiplab(aes(label=Group), size=3, hjust=.5) + geom_tiplab(aes(color = Group,x=x*1.2), hjust=1) # theme_dendrogram(plot.margin=margin(6,6,80,6))# 这是聚类图形的layoutp
# 指定物种组成的选项i = ps # 指定输入数据j = "Phylum" # 使用门水平绘制丰度图表rep = 6 # 重复数量是6个Top = 10 # 提取丰度前十的物种注释tran = TRUE # 转化为相对丰度值
# 按照分类学门(j)合并psdata = i %>% tax_glom(taxrank = j)# 转化丰度值if (tran == TRUE) { psdata = psdata%>% transform_sample_counts(function(x) {x/sum(x)} ) }#--提取otu和物种注释表格otu = otu_table(psdata)tax = tax_table(psdata) tax[1:3,1:7]#--按照指定的Top数量进行筛选与合并for (i in 1:dim(tax)[1]) { if (row.names(tax)[i] %in% names(sort(rowSums(otu), decreasing = TRUE)[1:Top])) { tax[i,j] =tax[i,j] } else { tax[i,j]= "Other" } } tax_table(psdata)= tax##转化为表格Taxonomies <- psdata %>% psmelt()# head(Taxonomies)Taxonomies$Abundance = Taxonomies$Abundance * 100
这里的格式也很简单,就是需要一列“id”,这里我们将样本名修改为id,即可
# colnames(Taxonomies)[1] = "id"Taxonomies$OTU = NULLcolnames(Taxonomies)[1] = "id"
因为我们颜色填充有好几种方式,所以需要对每种颜色填充保重独立性,使用ggnewscale,这也是Y叔的包。
library(ggnewscale)p <- p + new_scale_fill()p
p3 <- facet_plot(p, panel = 'Stacked Barplot', data = Taxonomies, geom = geom_barh,mapping = aes(x = Abundance, fill = as.factor(Phylum)),color = "black",stat='identity' ) p3
colbar <- dim(unique(dplyr::select(Taxonomies, one_of(j))))[1] colors = colorRampPalette(c("#CBD588", "#599861", "orange","#DA5724", "#508578", "#CD9BCD", "#AD6F3B", "#673770","#D14285", "#652926", "#C84248", "#8569D5", "#5E738F","#D1A33D", "#8A7C64","black"))(colbar) p3 + scale_fill_manual(values = colors)
修改layout,设置中空等。
p = ggtree(hc,layout="fan", branch.length = "none", ladderize = FALSE) %<+% dd + geom_tippoint(size=5, shape=21, aes(fill=factor(Group), x=x)) + geom_tiplab(aes(color = Group,x=x*1.2), hjust=1) p = p + xlim(-4,NA) p
如添加样品测序量柱状图、数值标签
p <- ggtree(hc) + theme_tree2() p
head(dd) dd$sequencenum = sample_sums(ps)dd data = data.frame(id = row.names(dd),sequencenum = dd$sequencenum ) head(data) # p3 #---------添加序列p2 <- facet_plot(p, panel = 'Number Barplot', data = dd , geom = geom_barh,mapping = aes(x = sequencenum ,fill = Group),stat='identity' ) p2
facet_plot(p2, panel='Stacked Barplot',data=dd, geom=geom_text, mapping=aes(x=sequencenum+20, label=sequencenum))
p3 <- facet_plot(p2, panel = 'Abundance Barplot', data = Taxonomies, geom = geom_barh,mapping = aes(x = Abundance, fill = as.factor(Phylum)),color = "black",stat='identity' ) p3
撰文:五谷杂粮
责编:刘永鑫 中科院遗传发育所
10000+: 菌群分析
宝宝与猫狗 提DNA发Nature 实验分析谁对结果影响大 Cell微生物专刊 肠道指挥大脑
为鼓励读者交流、快速解决科研困难,我们建立了“宏基因组”专业讨论群,目前己有国内外5000+ 一线科研人员加入。参与讨论,获得专业解答,欢迎分享此文至朋友圈,并扫码加主编好友带你入群,务必备注“姓名-单位-研究方向-职称/年级”。技术问题寻求帮助,首先阅读《如何优雅的提问》学习解决问题思路,仍末解决群内讨论,问题不私聊,帮助同行。
学习扩增子、宏基因组科研思路和分析实战,关注“宏基因组”
Archiver|手机版|科学网 ( 京ICP备07017567号-12 )
GMT+8, 2024-9-19 06:17
Powered by ScienceNet.cn
Copyright © 2007- 中国科学报社