dplyr的使用以及clusterProfiler使用

以后每周整理一次一些技术上的内容。
dplyr包的使用
用于整理数据
用到的函数:

  1. mutate()
  2. select()
  3. gather() 将宽数据格式变为长数据格
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# 处理CpG region 文件
library(dplyr)
library(tidyr)
setwd("C:/Users/Administrator/Desktop/")
df<-read.table("CpG_regions.txt",header = T)
df1 <- df %>%
mutate(W = (numCs1 + numCs2 + numCs3 + numCs4) / (coverage1 + coverage2 + coverage3 + coverage4),
S = (numCs5 + numCs6 + numCs7 + numCs8) / (coverage5 + coverage6 + coverage7 + coverage8)) %>%
select(-(strand:numTs8))
df1_long <- gather(df1,samples,level,W:S)
ggplot(df1_long, aes(samples,level))+geom_violin(aes(color=samples))+geom_boxplot(width=0.2,aes(color=samples))

write.csv(df1,"cpg.csv",row.names = F, quote=F)

# 用配对双尾t检验检测是否有差异

# gather
# https://www.cnblogs.com/shangfr/p/6110614.html

clusterProfiler使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
setwd("C:/Users/Administrator/Desktop/")

library(org.Atair10.eg.db)
library(clusterProfiler)
DEG_GENES<-read.table("up_yin.txt",header = T)
DEG_GENES<-as.character(DEG_GENES[,1])
org <- org.Atair10.eg.db
# OrgDb要改,keyType改成"GID"
ego_up <-enrichGO(gene = DEG_GENES,
OrgDb = org,
keyType = "GID",
ont = "BP"
)
dotplot(ego_up, showCategory=20)
write.csv(ego_up, "ego_up.csv", row.names = F)

# 这个结果和原来的一样,organism http://www.genome.jp/kegg/catalog/org_list.html
kegg_up = enrichKEGG(DEG_GENES, organism="ath", pvalueCutoff=.1)
write.csv(kegg_up,"kegg_up.csv",row.names = F)