生物信息学

20210318_WGCNA_加权基因共表达网络分析

Song Wei 2023年3月12日 05:43

305

20210318_印楝_WGCNA分析

（1）WGCNA (加权基因共表达网络分析) 是一种系统生物学方法，用于分析高通量基因表达数据。该方法可以识别共表达的基因集，这些基因集具有高度相关性，通常与生物学过程、细胞类型和疾病状态相关联。

（2）WGCNA 通过构建基因共表达网络来实现这一点，其中节点表示基因，边表示它们之间的相关性。然后，将网络分成不同的模块（也称为基因集），其中每个模块代表一个共同表达的基因集。这些模块可以与临床特征相关联，从而揭示潜在的生物学机制和治疗靶点。

（3）WGCNA 已被广泛应用于许多生物学领域，如癌症研究、神经科学和植物基因组学等。它是一种有力的工具，可用于研究基因调控网络和发现与疾病相关的生物标志物。

WGCNA分析需要两个文件：

（1）基因表达矩阵：genes.TMM.EXPR.matrix

（2）样品分组以及表型信息：sample_information.txt

library(WGCNA)
library(tidyverse)
gene_exp <- read.table(file = './genes.TMM.EXPR.matrix',row.names = 1)
head(gene_exp) 
#view(gene_exp)

sample_info <- read.table(file = './sample_information.txt',row.names = 1)
head(sample_info)
#view(sample_info)

# 转置
datExpr0 <- t(gene_exp)
head(datExpr0)

# 缺失数据及无波动数据过滤
gsg <- goodSamplesGenes(
  datExpr0, 
  minFraction = 1/2 #基因的缺失数据比例阈值
)
datExpr <- datExpr0[gsg$goodSamples, gsg$goodGenes]
dim(datExpr)
datExpr[1:6, 1:3]

# 通过聚类，查看是否有明显异常样本, 如果有需要剔除掉
plot(hclust(dist(datExpr)), 
     cex.lab = 1.5,
     cex.axis = 1.5, 
     cex.main = 2)

datTraits <- sample_info
datTraits
datTraits[1:6, 1:3]   #这个表格的样品顺序必须和上面的datExpr[1:6, 1:3]数据顺序相同

library(WGCNA)
# 多线程
enableWGCNAThreads(nThreads = 32)


# 通过对 power 的多次迭代，确定最佳 power
sft <- pickSoftThreshold(
  datExpr, 
  powerVector = 1:40, # 尝试 1 到 20
  networkType = "unsigned" 
)
head(sft)
save(sft,file="sft.rdata")
load(file = "./sft.rdata")


# 画图
library(ggplot2)
library(ggrepel)
library(cowplot)
library(ggthemes)
fig_power1 <- ggplot(data = sft$fitIndices,
                     aes(x = Power,
                         y = SFT.R.sq)) +
  geom_point(color = 'red') +
  geom_text_repel(aes(label = Power)) +
  geom_hline(aes(yintercept = 0.85), color = 'red') +
  labs(title = 'Scale independence',
       x = 'Soft Threshold (power)',
       y = 'Scale Free Topology Model Fit,signed R^2') +
  theme_few() +
  theme(plot.title = element_text(hjust = 0.5))


fig_power2 <- ggplot(data = sft$fitIndices,
                     aes(x = Power,
                         y = mean.k.)) +
  geom_point(color = 'red') +
  geom_text_repel(aes(label = Power)) +
  labs(title = 'Mean connectivity',
       x = 'Soft Threshold (power)',
       y = 'Mean Connectivity') +
  theme_few()+
  theme(plot.title = element_text(hjust = 0.5))
plot_grid(fig_power1, fig_power2)
p2 <- plot_grid(fig_power1, fig_power2)
ggsave("p2.pdf", p2, width = 9, height = 6, dpi=300)






##########################################################################
net <- blockwiseModules(
  # 0.输入数据
  datExpr, 
  
  # 1. 计算相关系数
  corType = "pearson", # 相关系数算法，pearson|bicor
  
  # 2. 计算邻接矩阵
  power = 19, # 前面得到的 soft power
  networkType = "unsigned", # unsigned | signed | signed hybrid
  
  # 3. 计算 TOM 矩阵
  TOMType = "unsigned", # none | unsigned | signed
  saveTOMs = TRUE,
  saveTOMFileBase = "blockwiseTOM",
  
  # 4. 聚类并划分模块
  deepSplit = 2, # 0|1|2|3|4, 值越大得到的模块就越多越小
  minModuleSize = 30,
  
  # 5. 合并相似模块
  ## 5.1 计算模块特征向量（module eigengenes， MEs），即 PC1
  ## 5.2 计算 MEs 与 datTrait 之间的相关性
  ## 5.3 对距离小于 mergeCutHeight （1-cor）的模块进行合并
  mergeCutHeight = 0.25, 
  
  # 其他参数
  numericLabels = FALSE, # 以数字命名模块
  nThreads = 0, # 0 则使用所有可用线程
  maxBlockSize = 100000 # 需要大于基因的数目
)
# 查看每个模块包含基因数目
table(net$colors) 

save(net,file="net.rdata")
load(file = "./net.rdata")

#$$$$
net3 <- blockwiseModules(
  # 0.输入数据
  datExpr, 
  
  # 1. 计算相关系数
  corType = "pearson", # 相关系数算法，pearson|bicor
  
  # 2. 计算邻接矩阵
  power = 19, # 前面得到的 soft power
  networkType = "unsigned", # unsigned | signed | signed hybrid
  
  # 3. 计算 TOM 矩阵
  TOMType = "unsigned", # none | unsigned | signed
  saveTOMs = TRUE,
  saveTOMFileBase = "blockwiseTOM",
  
  # 4. 聚类并划分模块
  deepSplit = 3, # 0|1|2|3|4, 值越大得到的模块就越多越小
  minModuleSize = 30,
  
  # 5. 合并相似模块
  ## 5.1 计算模块特征向量（module eigengenes， MEs），即 PC1
  ## 5.2 计算 MEs 与 datTrait 之间的相关性
  ## 5.3 对距离小于 mergeCutHeight （1-cor）的模块进行合并
  mergeCutHeight = 0.25, 
  
  # 其他参数
  numericLabels = FALSE, # 以数字命名模块
  nThreads = 0, # 0 则使用所有可用线程
  maxBlockSize = 100000 # 需要大于基因的数目
)
# 查看每个模块包含基因数目
table(net3$colors) 

save(net3,file="net3.rdata")
load(file = "./net3.rdata")



#$$$$$$
net4 <- blockwiseModules(
  # 0.输入数据
  datExpr, 
  
  # 1. 计算相关系数
  corType = "pearson", # 相关系数算法，pearson|bicor
  
  # 2. 计算邻接矩阵
  power = 19, # 前面得到的 soft power
  networkType = "unsigned", # unsigned | signed | signed hybrid
  
  # 3. 计算 TOM 矩阵
  TOMType = "unsigned", # none | unsigned | signed
  saveTOMs = TRUE,
  saveTOMFileBase = "blockwiseTOM",
  
  # 4. 聚类并划分模块
  deepSplit = 4, # 0|1|2|3|4, 值越大得到的模块就越多越小
  minModuleSize = 30,
  
  # 5. 合并相似模块
  ## 5.1 计算模块特征向量（module eigengenes， MEs），即 PC1
  ## 5.2 计算 MEs 与 datTrait 之间的相关性
  ## 5.3 对距离小于 mergeCutHeight （1-cor）的模块进行合并
  mergeCutHeight = 0.25, 
  
  # 其他参数
  numericLabels = FALSE, # 以数字命名模块
  nThreads = 0, # 0 则使用所有可用线程
  maxBlockSize = 100000 # 需要大于基因的数目
)
# 查看每个模块包含基因数目
table(net4$colors) 

save(net4,file="net4.rdata")
load(file = "./net4.rdata")
##################################################
library(tidyverse)
wgcna_result <- data.frame(gene_id = names(net$colors),
                           module = net$colors)
dim(wgcna_result)


plotDendroAndColors(
  dendro = net$dendrograms[[1]], 
  colors = net$colors,
  groupLabels = "Module colors",
  dendroLabels = FALSE, 
  addGuide = TRUE)

p4 <- plotDendroAndColors(
  dendro = net$dendrograms[[1]], 
  colors = net$colors,
  groupLabels = "Module colors",
  dendroLabels = FALSE, 
  addGuide = TRUE)
p4
ggsave("p4.pdf", p4, width = 20, height = 10, dpi=300)



moduleTraitCor <- cor(
  net$MEs,
  datTraits,
  use = "p",
  method = 'pearson' # 注意相关系数计算方式
)
# 计算 Pvalue
moduleTraitPvalue <- corPvalueStudent(
  moduleTraitCor, 
  nrow(datExpr))

# 相关性 heatmap
sizeGrWindow(10,6)
# 连接相关性和 pvalue
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(",
                    signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) <- dim(moduleTraitCor)

# heatmap 画图
par(mar = c(6, 8.5, 3, 3))
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels = names(datTraits),
               yLabels = names(net$MEs),
               ySymbols = names(net$MEs),
               colorLabels = FALSE,
               colors = greenWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE,
               cex.text = 0.5,
               zlim = c(-1,1),
               main = paste("Module-trait relationships"))
dim(moduleTraitCor)
head(moduleTraitCor)

MET2 <- net$MEs
MET <- orderMEs(cbind(net$MEs, dplyr::select(datTraits, Azadirachtin)))
head(MET)
plotEigengeneNetworks(
  multiME = MET, 
  setLabels = "Eigengene dendrogram", 
  plotDendrograms = TRUE, 
  plotHeatmaps = FALSE,
  colorLabels = TRUE,
  marHeatmap = c(3,4,2,2))

plotEigengeneNetworks(
  multiME = MET2, 
  setLabels = "Eigengene dendrogram", 
  plotDendrograms = FALSE, 
  plotHeatmaps = TRUE,
  colorLabels = TRUE,
  marHeatmap = c(8,8,2,2))
head(MET)


# 需要导出的模块
my_modules <- c('yellow') 
# 提取该模块的表达矩阵
filter(wgcna_result, module == my_modules)
m_wgcna_result <- filter(wgcna_result, module %in% my_modules)
head(m_wgcna_result)
m_wgcna_result
m_datExpr <- datExpr[, m_wgcna_result$gene_id]
# 计算该模块的 TOM 矩阵  
m_TOM <- TOMsimilarityFromExpr(
  m_datExpr,
  power = 6,
  networkType = "unsigned",
  TOMType = "unsigned")
head(m_TOM)
dimnames(m_TOM) <- list(colnames(m_datExpr), colnames(m_datExpr))



#####################################################################
library(dplyr)
#BiocManager::install("vegan")
library(vegan)
library(ggplot2)
#devtools::install_git("https://gitee.com/dr_yingli/ggcor") 
library(ggcor)
packageVersion('ggcor')
#view(MET2)
head(MET2)
#view(sample_info)
head(sample_info)

quickcor(MET2) + geom_square()
quickcor(MET2, type = "upper") + geom_circle2()
quickcor(MET2, type = "full", cor.test = TRUE) + geom_circle2()
quickcor(MET2, type = "upper", cor.test = TRUE) + geom_circle2()


corr <- fortify_cor(MET2, type = "upper", show.diag = TRUE,
                    cor.test = TRUE, cluster.type = "all")

mantel <- mantel_test( sample_info,MET2,
                       spec.select = list(spec01 = 1,
                                          spec02 = 2,
                                          spec03 = 3),mantel.fun = "mantel.randtest")
mantel2 <- mantel_test(sample_info,MET2,
                      spec.select = list(Spec01 = 1,
                                         Spec02 = 2,
                                         Spec03 = 3)) %>% 
  mutate(rd = cut(r, breaks = c(-Inf, 0.2, 0.4, Inf),
                  labels = c("< 0.2", "0.2 - 0.4", ">= 0.4")),
         pd = cut(p.value, breaks = c(-Inf, 0.01, 0.05, Inf),
                  labels = c("< 0.01", "0.01 - 0.05", ">= 0.05")))
head(mantel2)


mantel3 <- mantel_test(sample_info,MET2,
                       spec.select = list(Azadirachtin = 1,
                                          Nimbin = 2,
                                          Salanin = 3)) %>% 
  mutate(rd = cut(r, breaks = c(-Inf, 0.2, 0.4, Inf),
                  labels = c("< 0.2", "0.2 - 0.4", ">= 0.4")),
         pd = cut(p.value, breaks = c(-Inf, 0.001,0.01, 0.05, Inf),
                  labels = c("< 0.001","0.001-0.01", "0.01 - 0.05", ">= 0.05")))
view(mantel3)

library(ggcor)

quickcor(corr, type = "upper") +
  geom_square() +
  anno_link(aes(colour = pd, size = rd), data = mantel2) +
  scale_size_manual(values = c(0.5, 1, 2))+
  guides(size = guide_legend(title = "Mantel's r",
                             order = 2),
         colour = guide_legend(title = "Mantel's p", 
                               order = 1),
         fill = guide_colorbar(title = "Pearson's r", order = 3))
quickcor(corr, type = "upper") +
  geom_circle2() +
  anno_link(aes(colour = pd, size = rd), data = mantel3) +
  scale_size_manual(values = c(0.5, 1, 2))+
  guides(size = guide_legend(title = "Mantel's r",
                             order = 2),
         colour = guide_legend(title = "Mantel's p", 
                               order = 1),
         fill = guide_colorbar(title = "Pearson's r", order = 3))
quickcor(corr,type="upper") + geom_circle2(scale_fill_manual(values = c("red","gray","green")))

quickcor(corr, type = "upper") +
  geom_circle2(colour = "black") +
  anno_link(aes(colour = rd, size = pd), data = mantel3) +
  scale_size_manual(values = c(0.5, 1, 2))+
  guides(size = guide_legend(title = "Mantel's p",
                             order = 2),
         colour = guide_legend(title = "Mantel's r", 
                               order = 1),
         fill = guide_colorbar(title = "Pearson's r", order = 3))+
  scale_size_manual(values = c(2, 0.5))+
  scale_colour_manual(values = c("#A2A2A288","#D95F02", "#1B9E77")) +
  scale_fill_gradient2(midpoint = 0, low = "#A91529", mid = "white",high = "#053061", space = "Lab" )



################################################################################
quickcor(corr, type = "upper") +
  geom_circle2(colour = "black") +
  anno_link(aes(colour = rd, size = pd), data = mantel3) +
  scale_size_manual(values = c(0.5, 1, 2))+
  guides(size = guide_legend(title = "Mantel's p",
                             order = 2),
         colour = guide_legend(title = "Mantel's r", 
                               order = 1),
         fill = guide_colorbar(title = "Pearson's r", order = 3))+
  scale_size_manual(values = c(2, 0.5))+
  scale_colour_manual(values = c("#A2A2A288","#D95F02", "#1B9E77")) 



#测试数据

library(tidyverse)
library(ggcor)
data("varechem", package = "vegan")
view(varechem)
dim(varechem)
data("varespec", package = "vegan")
view(varespec)
dim(varespec)
mantel <- mantel_test(varespec, varechem,
                      spec.select = list(Spec01 = 1:7,
                                         Spec02 = 8:18,
                                         Spec03 = 19:37,
                                         Spec04 = 38:44)) %>% 
  mutate(rd = cut(r, breaks = c(-Inf, 0.2, 0.4, Inf),
                  labels = c("< 0.2", "0.2 - 0.4", ">= 0.4")),
         pd = cut(p.value, breaks = c(-Inf, 0.01, 0.05, Inf),
                  labels = c("< 0.01", "0.01 - 0.05", ">= 0.05")))
head(mantel)

quickcor(varechem, type = "upper") +
  geom_square() +
  anno_link(aes(colour = pd, size = rd), data = mantel) +
  scale_size_manual(values = c(0.5, 1, 2))+
  guides(size = guide_legend(title = "Mantel's r",
                             order = 2),
         colour = guide_legend(title = "Mantel's p", 
                               order = 1),
         fill = guide_colorbar(title = "Pearson's r", order = 3))

##########################################################################
#KEGG富集分析
# 需要导出的模块
my_modules <- c('brown') 
# 提取该模块的表达矩阵
filter(wgcna_result, module == my_modules)
mRNA_wgcna <- filter(wgcna_result, module %in% my_modules)%>% pull(gene_id)
view(mRNA_wgcna)
dim(mRNA_wgcna)
class(mRNA_wgcna)

emapper <- read_delim('./3_canu_genome.eggnog.mapper.result.emapper.annotations', 
                      "\t", escape_double = FALSE, col_names = FALSE, 
                      comment = "#", trim_ws = TRUE) %>%
  dplyr::select(GID = X1, 
                KO = X9, 
                Pathway = X10)
view(emapper)

pathway2gene <- dplyr::select(emapper, Pathway, GID) %>%
  separate_rows(Pathway, sep = ',', convert = F) %>%
  filter(str_detect(Pathway, 'ko')) %>%
  mutate(Pathway = str_remove(Pathway, 'ko'))
pathway2gene
view(pathway2gene)



library(magrittr)
get_path2name <- function(){
  keggpathid2name.df <- clusterProfiler:::kegg_list("pathway")
  keggpathid2name.df[,1] %<>% gsub("path:map", "", .)
  colnames(keggpathid2name.df) <- c("path_id","path_name")
  return(keggpathid2name.df)
}
pathway2name <- get_path2name()



library(magrittr)
library(clusterProfiler)
get_path2name <- function(){
  keggpathid2name.df <- clusterProfiler:::kegg_list("pathway")
  keggpathid2name.df[,1] %<>% gsub("path:map", "", .)
  colnames(keggpathid2name.df) <- c("path_id","path_name")
  return(keggpathid2name.df)
}
pathway2name <- get_path2name()
pathway2name



library(clusterProfiler)
de_ekp <- enricher(mRNA_wgcna,
                   TERM2GENE = pathway2gene,
                   TERM2NAME = pathway2name,
                   pvalueCutoff = 0.05,
                   qvalueCutoff = 0.05)
de_ekp
class(de_ekp)
de_ekp_df <- as.data.frame(de_ekp)
head(de_ekp_df)

barplot(de_ekp,showCategory = 10)
dotplot(de_ekp)
cnetplot(de_ekp,showCategory = 10,node_label = "gene",#category | gene|all|none
         circular=TRUE,colorEdge=TRUE)
?cnetplot
cnetplot(de_ekp,showCategory = 10,node_label = "gene",#category | gene|all|none
         circular=TRUE,colorEdge=TRUE,cex_label_gene = 0.5,cex_label_category=0.8)



##########################################################################
#GO富集分析
library(argparser)
library(AnnotationForge)
library(AnnotationForge)
library(GO.db)
dir.create('R_Library', recursive = T)
#(R_env_1) dell@dell-server:~/桌面/kegg_go_plot_result/kegg_plot$ Rscript  emcp/create_orgdb_from_emapper.R  3_canu_genome.eggnog.mapper.result.emapper.annotations
#dir.create('R_Library', recursive = T)
# 将包安装在该文件夹下
install.packages('./org.My.eg.db_1.0.tar.gz',repos = NULL, #从本地安装
                 lib = 'R_Library') # 安装文件夹
# 加载 OrgDB
library(org.My.eg.db, lib = 'R_Library')


# 需要导出的模块
my_modules <- c('brown') 
# 提取该模块的表达矩阵
filter(wgcna_result, module == my_modules)
mRNA_wgcna <- filter(wgcna_result, module %in% my_modules)%>% pull(gene_id)
view(mRNA_wgcna)
dim(mRNA_wgcna)
class(mRNA_wgcna)



de_ego <- enrichGO(gene = mRNA_wgcna,
                   OrgDb = org.My.eg.db,
                   keyType = 'GID',
                   ont = 'ALL',
                   qvalueCutoff = 0.05,
                   pvalueCutoff = 0.05)

de_ego_df <- as.data.frame(de_ego)
head(de_ego_df)
#$$
barplot(de_ego, showCategory = 10, split="ONTOLOGY") + 
  facet_grid(ONTOLOGY~., scale="free") + scale_y_discrete(labels=function(y) stringr::str_wrap(y,width=36))


dotplot(de_ego, showCategory = 10, split="ONTOLOGY",font.size = 8) +   
  facet_grid(ONTOLOGY~., scale="free")+ scale_y_discrete(labels=function(y) stringr::str_wrap(y,width=36))

p2 <- cnetplot(de_ego,showCategory = 5,node_label = "gene",#category | gene|all|none
               circular=TRUE,colorEdge=TRUE,cex_label_gene = 0.5,cex_label_category=0.8,cex_gene=0.6,cex_category = 0.6)+ scale_y_discrete(labels=function(y) stringr::str_wrap(y,width=36))



p4 <- plot_grid(p1,p2,labels = "Chrom14 GO pathway enrichment",label_size = 14,label_fontface = "bold",label_x = -0.2,label_y = 1) 
ggsave("chrom14_GO.pdf", p4, width = 13, height = 6, dpi=300)