# General methylation in promoters, enhancers and gene bodies - all datasets
library( ff )
## Loading required package: bit
## Attaching package bit
## package:bit (c) 2008-2012 Jens Oehlschlaegel (GPL-2)
## creators: bit bitwhich
## coercion: as.logical as.integer as.bit as.bitwhich which
## operator: ! & | xor != ==
## querying: print length any all min max range sum summary
## bit access: length<- [ [<- [[ [[<-
## for more help type ?bit
##
## Attaching package: 'bit'
## The following object is masked from 'package:base':
##
## xor
## Attaching package ff
## - getOption("fftempdir")=="C:/Users/jro049/AppData/Local/Temp/RtmpQLzjWr"
## - getOption("ffextension")=="ff"
## - getOption("ffdrop")==TRUE
## - getOption("fffinonexit")==TRUE
## - getOption("ffpagesize")==65536
## - getOption("ffcaching")=="mmnoflush" -- consider "ffeachflush" if your system stalls on large writes
## - getOption("ffbatchbytes")==1030771179.52 -- consider a different value for tuning your system
## - getOption("ffmaxbytes")==51538558976 -- consider a different value for tuning your system
##
## Attaching package: 'ff'
## The following objects are masked from 'package:bit':
##
## clone, clone.default, clone.list
## The following objects are masked from 'package:utils':
##
## write.csv, write.csv2
## The following objects are masked from 'package:base':
##
## is.factor, is.ordered
library( ggplot2 )
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library( dplyr )
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
source( "../common_variables.R" )
# gather all the data
betas.df.subgroup.all <- tibble( name = "A",
CpG = "cpg",
position = 1,
indiv = "i",
indiv.ctd = "i2",
beta = 0.1,
mvalue = 0.1,
ensembl_reg_feat = "reg",
CGI = FALSE )
for( chosen.cleft in cleft.types ){
load( paste0( "annotation_overview_chosenSNPregions_",
file.names.root[ chosen.cleft ], ".RData" ) )
betas.df.subgroup.all <- union( betas.df.subgroup.all, betas.df.subgroup.new.reg.feat )
}
## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector
## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector
## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector
## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector
## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector
betas.df.subgroup.all <- betas.df.subgroup.all[ -1, ]
# there are so many different categories of CpGs:
unique( betas.df.subgroup.all$ensembl_reg_feat )
## [1] "gene" "enhancer-gene" "enhancer"
## [4] "promoter-gene" "promoter_flank-gene" "promoter"
## [7] "promoter_flank" "none"
# create grouping for plotting purposes:
# 1. promoter vs. other
betas.df.subgroup.all$is.promoter <- FALSE
any.promoter <- grepl( pattern = "promoter",
betas.df.subgroup.all$ensembl_reg_feat, fixed = TRUE )
betas.df.subgroup.all$is.promoter <- any.promoter
table( betas.df.subgroup.all$is.promoter )
##
## FALSE TRUE
## 161755 83905
# plot
ggplot( betas.df.subgroup.all, aes( is.promoter, beta ) ) +
# geom_boxplot( varwidth = TRUE ) +
geom_violin() +
labs( x = "CpG within promoter", y = "DNA methylation (beta)" ) +
theme( axis.title = element_text( size = rel( 2 ) ),
axis.text = element_text( size = rel( 1.5 ) ) )

ggsave( filename = "gen_meth_promoter-beta.jpg" )
## Saving 7 x 5 in image
# 2. the enhancer regions:
any.enhancer <- grepl( pattern = "enhancer",
betas.df.subgroup.all$ensembl_reg_feat, fixed = TRUE )
betas.df.subgroup.all$is.enhancer <- any.enhancer
# plot
ggplot( betas.df.subgroup.all, aes( is.enhancer, beta ) ) +
# geom_boxplot( varwidth = TRUE ) +
geom_violin() +
labs( x = "CpG within enhancer", y = "DNA methylation (beta)" ) +
theme( axis.title = element_text( size = rel( 2 ) ),
axis.text = element_text( size = rel( 1.5 ) ) )

ggsave( filename = "gen_meth_enhancer-beta.jpg" )
## Saving 7 x 5 in image
# 3. gene body regions:
any.gene <- grepl( pattern = "gene",
betas.df.subgroup.all$ensembl_reg_feat, fixed = TRUE )
betas.df.subgroup.all$is.gene <- any.gene
# plot
ggplot( betas.df.subgroup.all, aes( is.gene, beta ) ) +
# geom_boxplot( varwidth = TRUE ) +
geom_violin() +
labs( x = "CpG within gene", y = "DNA methylation (beta)" ) +
theme( axis.title = element_text( size = rel( 2 ) ),
axis.text = element_text( size = rel( 1.5 ) ) )

ggsave( filename = "gen_meth_gene-beta.jpg" )
## Saving 7 x 5 in image