# General methylation in promoters, enhancers and gene bodies - all datasets
library( ff )
## Loading required package: bit
## Attaching package bit
## package:bit (c) 2008-2012 Jens Oehlschlaegel (GPL-2)
## creators: bit bitwhich
## coercion: as.logical as.integer as.bit as.bitwhich which
## operator: ! & | xor != ==
## querying: print length any all min max range sum summary
## bit access: length<- [ [<- [[ [[<-
## for more help type ?bit
## 
## Attaching package: 'bit'
## The following object is masked from 'package:base':
## 
##     xor
## Attaching package ff
## - getOption("fftempdir")=="C:/Users/jro049/AppData/Local/Temp/RtmpQLzjWr"
## - getOption("ffextension")=="ff"
## - getOption("ffdrop")==TRUE
## - getOption("fffinonexit")==TRUE
## - getOption("ffpagesize")==65536
## - getOption("ffcaching")=="mmnoflush"  -- consider "ffeachflush" if your system stalls on large writes
## - getOption("ffbatchbytes")==1030771179.52 -- consider a different value for tuning your system
## - getOption("ffmaxbytes")==51538558976 -- consider a different value for tuning your system
## 
## Attaching package: 'ff'
## The following objects are masked from 'package:bit':
## 
##     clone, clone.default, clone.list
## The following objects are masked from 'package:utils':
## 
##     write.csv, write.csv2
## The following objects are masked from 'package:base':
## 
##     is.factor, is.ordered
library( ggplot2 )
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
library( dplyr )
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
source( "../common_variables.R" )

# gather all the data
betas.df.subgroup.all <- tibble( name = "A",
                                 CpG = "cpg",
                                 position = 1,
                                 indiv = "i",
                                 indiv.ctd = "i2",
                                 beta = 0.1,
                                 mvalue = 0.1,
                                 ensembl_reg_feat = "reg",
                                 CGI = FALSE )
for( chosen.cleft in cleft.types ){
  load( paste0( "annotation_overview_chosenSNPregions_",
                        file.names.root[ chosen.cleft ], ".RData" ) )
  betas.df.subgroup.all <- union( betas.df.subgroup.all, betas.df.subgroup.new.reg.feat )
}
## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector

## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector

## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector

## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector

## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector
betas.df.subgroup.all <- betas.df.subgroup.all[ -1, ]
# there are so many different categories of CpGs:
unique( betas.df.subgroup.all$ensembl_reg_feat )
## [1] "gene"                "enhancer-gene"       "enhancer"           
## [4] "promoter-gene"       "promoter_flank-gene" "promoter"           
## [7] "promoter_flank"      "none"
# create grouping for plotting purposes:
# 1. promoter vs. other

betas.df.subgroup.all$is.promoter <- FALSE
any.promoter <- grepl( pattern = "promoter", 
                       betas.df.subgroup.all$ensembl_reg_feat, fixed = TRUE )

betas.df.subgroup.all$is.promoter <- any.promoter
table( betas.df.subgroup.all$is.promoter )
## 
##  FALSE   TRUE 
## 161755  83905
# plot
ggplot( betas.df.subgroup.all, aes( is.promoter, beta ) ) +
  # geom_boxplot( varwidth = TRUE ) +
  geom_violin() +
    labs( x = "CpG within promoter", y = "DNA methylation (beta)" ) +
    theme( axis.title = element_text( size = rel( 2 ) ),
            axis.text = element_text( size = rel( 1.5 ) ) )

ggsave( filename = "gen_meth_promoter-beta.jpg" )
## Saving 7 x 5 in image
# 2. the enhancer regions:
any.enhancer <- grepl( pattern = "enhancer", 
                       betas.df.subgroup.all$ensembl_reg_feat, fixed = TRUE )

betas.df.subgroup.all$is.enhancer <- any.enhancer

# plot
ggplot( betas.df.subgroup.all, aes( is.enhancer, beta ) ) +
  # geom_boxplot( varwidth = TRUE ) +
  geom_violin() +
    labs( x = "CpG within enhancer", y = "DNA methylation (beta)" ) +
    theme( axis.title = element_text( size = rel( 2 ) ),
            axis.text = element_text( size = rel( 1.5 ) ) )

ggsave( filename = "gen_meth_enhancer-beta.jpg" )
## Saving 7 x 5 in image
# 3. gene body regions:
any.gene <- grepl( pattern = "gene", 
                       betas.df.subgroup.all$ensembl_reg_feat, fixed = TRUE )

betas.df.subgroup.all$is.gene <- any.gene

# plot
ggplot( betas.df.subgroup.all, aes( is.gene, beta ) ) +
  # geom_boxplot( varwidth = TRUE ) +
  geom_violin() +
    labs( x = "CpG within gene", y = "DNA methylation (beta)" ) +
    theme( axis.title = element_text( size = rel( 2 ) ),
            axis.text = element_text( size = rel( 1.5 ) ) )

ggsave( filename = "gen_meth_gene-beta.jpg" )
## Saving 7 x 5 in image