# General methylation in promoters, enhancers and gene bodies - all datasets
source( "../common_variables.R" )

# gather all the data
betas.df.subgroup.all <- tibble( name = "A",
                                 CpG = "cpg",
                                 position = 1,
                                 indiv = "i",
                                 indiv.ctd = "i2",
                                 beta = 0.1,
                                 mvalue = 0.1,
                                 ensembl_reg_feat = "reg",
                                 CGI = FALSE )
for( chosen.cleft in cleft.types ){
  load( paste0( "annotation_overview_chosenSNPregions_",
                        file.names.root[ chosen.cleft ], ".RData" ) )
  betas.df.subgroup.all <- union( betas.df.subgroup.all, betas.df.subgroup.new.reg.feat )
betas.df.subgroup.all <- betas.df.subgroup.all[ -1, ]
# there are so many different categories of CpGs:
unique( betas.df.subgroup.all$ensembl_reg_feat )
## [1] "gene"                "enhancer-gene"       "enhancer"           
## [4] "promoter-gene"       "promoter_flank-gene" "promoter"           
## [7] "promoter_flank"      "none"
# create grouping for plotting purposes:
# 1. promoter vs. other

betas.df.subgroup.all$is.promoter <- FALSE
any.promoter <- grepl( pattern = "promoter", 
                       betas.df.subgroup.all$ensembl_reg_feat, fixed = TRUE )

betas.df.subgroup.all$is.promoter <- any.promoter
table( betas.df.subgroup.all$is.promoter )
## 161755  83905
# plot
ggplot( betas.df.subgroup.all, aes( is.promoter, beta ) ) +
  # geom_boxplot( varwidth = TRUE ) +
  geom_violin() +
    labs( x = "CpG within promoter", y = "DNA methylation (beta)" ) +
    theme( axis.title = element_text( size = rel( 2 ) ),
            axis.text = element_text( size = rel( 1.5 ) ) )

ggsave( filename = "gen_meth_promoter-beta.jpg" )
# 2. the enhancer regions:
any.enhancer <- grepl( pattern = "enhancer", 
                       betas.df.subgroup.all$ensembl_reg_feat, fixed = TRUE )

betas.df.subgroup.all$is.enhancer <- any.enhancer

# plot
ggplot( betas.df.subgroup.all, aes( is.enhancer, beta ) ) +
  # geom_boxplot( varwidth = TRUE ) +
  geom_violin() +
    labs( x = "CpG within enhancer", y = "DNA methylation (beta)" ) +
    theme( axis.title = element_text( size = rel( 2 ) ),
            axis.text = element_text( size = rel( 1.5 ) ) )

ggsave( filename = "gen_meth_enhancer-beta.jpg" )
# 3. gene body regions:
any.gene <- grepl( pattern = "gene", 
                       betas.df.subgroup.all$ensembl_reg_feat, fixed = TRUE )

betas.df.subgroup.all$is.gene <- any.gene

# plot
ggplot( betas.df.subgroup.all, aes( is.gene, beta ) ) +
  # geom_boxplot( varwidth = TRUE ) +
  geom_violin() +
    labs( x = "CpG within gene", y = "DNA methylation (beta)" ) +
    theme( axis.title = element_text( size = rel( 2 ) ),
            axis.text = element_text( size = rel( 1.5 ) ) )

ggsave( filename = "gen_meth_gene-beta.jpg" )
