# Creating a nice overview of the annotations within the 50kbp regions of the chosen SNPs
# using the entire dataset! (i.e., not splitting into cleft-types!)
# setwd( "PLOTTING" )
load( "../new_betas_df_subgroup_gene.RData" )
load( "../new_betas_matrices_ensembl_promoter_enhancer.RData" )
load( "../newest_betas_prepared.RData" )
cpg.islands.all <- read.table( "../../ORIG_DATA/CpG_islands_hg19.dat", header = TRUE, sep = "\t" )
cpg.info <- read.csv( "../../ORIG_DATA/cpg_info_all.csv" )
cpg.info$TargetID <- as.character( cpg.info$TargetID )
rownames( cpg.info ) <- cpg.info$TargetID
cpg.info <- cpg.info[ !is.na(cpg.info$MAPINFO), ]
dim( betas.df.subgroup.gene.list[[ 1 ]] )
## [1] 16171 9
source( "clean_betas_annotation.R" )
source( "../common_variables.R" )
library( ggplot2 )
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library( dplyr )
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# col.names <- c( "name","CpG","position","indiv","indiv.ctd",
# "beta","mvalue","ensembl_reg_feat","CGI" )
betas.df.subgroup.all <- tibble( name = "A",
CpG = "cpg",
position = 1,
indiv = "i",
indiv.ctd = "i2",
beta = 0.1,
mvalue = 0.1,
ensembl_reg_feat = "reg",
CGI = FALSE )
for( chosen.cleft in cleft.types ){
file.out <- paste0( "annotation_overview_chosenSNPregions_",
file.names.root[ chosen.cleft ], ".RData" )
clean.betas.annotation( betas.df.subgroup.gene = betas.df.subgroup.gene.list[[ chosen.cleft ]],
betas.df.subgroup.promoter = betas.df.subgroup.promoter.list[[ chosen.cleft ]],
betas.df.subgroup.enhancer = betas.df.subgroup.enhancer.list[[ chosen.cleft ]],
betas.df.subgroup = betas.df.subgroup[[ chosen.cleft ]],
cpg.islands.all, cpg.info, file.out, print = FALSE )
load( paste0( "annotation_overview_chosenSNPregions_",
file.names.root[ chosen.cleft ], ".RData" ) )
betas.df.subgroup.all <- union( betas.df.subgroup.all, betas.df.subgroup.new.reg.feat )
}
## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector
## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector
## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector
## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector
## Warning: Column `CpG` joining character vector and factor, coercing into
## character vector
betas.df.subgroup.all <- betas.df.subgroup.all[ -1, ]
# for ordering of CpGs with their position:
betas.df.subgroup.new.reg.feat.ordered <- betas.df.subgroup.all[
order( betas.df.subgroup.all$name, betas.df.subgroup.all$position ), ]
betas.df.subgroup.new.reg.feat.ordered$cpg.order <- with(
betas.df.subgroup.new.reg.feat.ordered, paste( CpG, name, sep = "." ) )
# changing the order of factor levels:
betas.df.subgroup.new.reg.feat.ordered$ensembl_reg_feat <- factor(
betas.df.subgroup.new.reg.feat.ordered$ensembl_reg_feat,
levels = c( "enhancer", "enhancer-gene", "gene", "promoter", "promoter-gene",
"promoter_flank", "promoter_flank-gene", "none" ), ordered = TRUE )
# PLOTTING
# function for extracting the text for x axis labelling
xlab.fun <- function(x){
sapply( x, function(y){
splitted <- unlist( strsplit( y, split = ".", fixed = TRUE ) )
l <- length( splitted )
last.splitted <- splitted[ l ]
# there is one special case when the name of the locus has a dot
if( last.splitted == "3" ){
# return the name of cpg
return( paste( splitted[ -( (l-1):l ) ], collapse = "." ) )
}
return( paste( splitted[ -l ], collapse = "." ) )
} )
}
# facet names - SNP id and gene in parenthesis
tmp.all.snp.names <- tolower( as.character( my.SNP.map$Marker[ match(
betas.df.subgroup.new.reg.feat.ordered$name, my.SNP.map$gene_name ) ] ) )
any( is.na( tmp.all.snp.names ) )
## [1] FALSE
tmp.all.gene.names <- as.character( betas.df.subgroup.new.reg.feat.ordered$name )
# tmp.all.gene.names[ tmp.all.gene.names == "hCG_1814486" ] <- "intergenic"
betas.df.subgroup.new.reg.feat.ordered$labels <- paste0( tmp.all.snp.names, " (",
tmp.all.gene.names, ")" )
# plotting the beta values
p2 <- ggplot( betas.df.subgroup.new.reg.feat.ordered,
aes( x = factor( cpg.order, levels = unique( cpg.order ) ), y = beta ) )
p2 + geom_point( aes( color = ensembl_reg_feat, shape = as.factor( CGI ) ) ) +
facet_wrap( ~ labels, scales = "free_x", ncol = 2 ) +
scale_color_brewer( name = "regulatory feature", type = "qual", palette = "Dark2" ) +
scale_shape_discrete( name = "CGI" ) +
scale_x_discrete( labels = xlab.fun ) +
theme( legend.position = "bottom", axis.text.x = element_text( angle = 90 ) ) +
ylab( "beta value" ) + xlab( "CpG" )

ggsave( "Overview_b-val_ensembl_reg_feat_chosenSNPregions_all.jpg",
height = 20, width = 14, units = "in" )