# checking the rs227731 x prom (PoOxMe) significant interaction

library( ff )
## Loading required package: bit
## Attaching package bit
## package:bit (c) 2008-2012 Jens Oehlschlaegel (GPL-2)
## creators: bit bitwhich
## coercion: as.logical as.integer as.bit as.bitwhich which
## operator: ! & | xor != ==
## querying: print length any all min max range sum summary
## bit access: length<- [ [<- [[ [[<-
## for more help type ?bit
## 
## Attaching package: 'bit'
## The following object is masked from 'package:base':
## 
##     xor
## Attaching package ff
## - getOption("fftempdir")=="/tmp/Rtmpc6xwNS"
## - getOption("ffextension")=="ff"
## - getOption("ffdrop")==TRUE
## - getOption("fffinonexit")==TRUE
## - getOption("ffpagesize")==65536
## - getOption("ffcaching")=="mmnoflush"  -- consider "ffeachflush" if your system stalls on large writes
## - getOption("ffbatchbytes")==16777216 -- consider a different value for tuning your system
## - getOption("ffmaxbytes")==536870912 -- consider a different value for tuning your system
## 
## Attaching package: 'ff'
## The following objects are masked from 'package:bit':
## 
##     clone, clone.default, clone.list
## The following objects are masked from 'package:utils':
## 
##     write.csv, write.csv2
## The following objects are masked from 'package:base':
## 
##     is.factor, is.ordered
library( dplyr )
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library( tidyr )
library( ggplot2 )

source( "common_variables.R" )
chosen.cleft <- "CL/P"

load( "newest_betas_prepared.RData" )
load( "new_betas_matrices_ensembl_promoter_enhancer.RData" )
load( "new_betas_df_subgroup_gene.RData" )

map.ok <- read.table( "new_SNPs.map", header = TRUE, stringsAsFactors = FALSE )

chosen.SNP <- "rs227731"
chosen.gene <- unlist( dplyr::filter( my.SNP.map, Marker == chosen.SNP ) %>%
                         dplyr::select( gene_name ) )
chosen.gene <- as.character( levels( chosen.gene )[ chosen.gene ] )
chosen.CpGs <- c( "ch.17.52148184", "cg24806663" )
# below, I restrict all the data to the "chosen.SNP" and "chosen.CpGs"

chosen.betas <- betas.matrix.promoter.list[[ chosen.cleft ]][[ chosen.gene ]]
betas.sum.by.promoter <- rowSums( chosen.betas )/ncol( chosen.betas )
out <- quantile( as.numeric( betas.sum.by.promoter ), probs = c( 0, (1:3)/3 ) )
out[ 1 ] <- out[ 1 ] - 1
out[ 4 ] <- out[ 4 ] + 1
betas.breaks.promoter <- out
cat( "Grouping limits, promoter regions:\n")
## Grouping limits, promoter regions:
betas.breaks.promoter
##         0%  33.33333%  66.66667%       100% 
## -0.5120293  0.5118704  0.5216657  1.5606337
out <- cut( as.numeric( betas.sum.by.promoter ),
    breaks = as.numeric( betas.breaks.promoter ),
    include.lowest = TRUE, labels = FALSE )
betas.promoter.cat <- data.frame( name = chosen.gene,
          SNP = chosen.SNP,
          indiv = names( betas.sum.by.promoter ),
                    group = as.factor( out ), stringsAsFactors = FALSE )
# plotting the values per individual, groupped by the stratum
chosen.betas.df <- as.data.frame( chosen.betas )
chosen.betas.df$indiv <- rownames( chosen.betas )
chosen.betas.df <- gather( chosen.betas.df, key = CpG, value = beta,
                           c( "cg24806663", "ch.17.52148184R" ) )
plot.df <- left_join( betas.promoter.cat, chosen.betas.df, by = "indiv" )
ggplot( plot.df, aes( x = CpG, y = indiv, color = beta ) ) +
  geom_point() +
  facet_grid( rows = vars( group ) )

ggplot( plot.df, aes( x = 1, y = indiv ) ) +
  geom_raster( aes( fill = beta ) ) +
  facet_grid( rows = vars( group ), cols = vars( CpG ))

plot.df.cpg1 <- bind_cols( betas.promoter.cat,
          as.data.frame( chosen.betas )[ ,1, drop = FALSE ] )
plot.df.cpg2 <- bind_cols( betas.promoter.cat,
          as.data.frame( chosen.betas )[ ,2, drop = FALSE ] )
names( plot.df.cpg1 )[ 5 ] <- "beta"
names( plot.df.cpg2 )[ 5 ] <- "beta"

plot.df.cpg1.ordered <- arrange( plot.df.cpg1, group, beta )
plot.df.cpg2.ordered <- plot.df.cpg2[ match( plot.df.cpg1.ordered$indiv, plot.df.cpg2$indiv ), ]
identical( plot.df.cpg2.ordered$indiv, plot.df.cpg1.ordered$indiv )
## [1] TRUE
plot.df.cpg1.ordered$indiv <- factor( plot.df.cpg1.ordered$indiv,
  levels = plot.df.cpg1.ordered$indiv, ordered = TRUE )
plot.df.cpg2.ordered$indiv <- factor( plot.df.cpg2.ordered$indiv,
  levels = plot.df.cpg2.ordered$indiv, ordered = TRUE )

ggplot( plot.df.cpg1.ordered, aes( x = 1, y = indiv ) ) +
  geom_raster( aes( fill = beta ) ) +
  facet_grid( rows = vars( group ), scales = "free_y", switch = "y" ) +
  ylab( "Individual" ) +
  labs( title = "cg24806663" ) +
  theme( axis.text = element_blank(),
         axis.ticks = element_blank(),
         axis.title.x = element_blank(),
         legend.position = "bottom" )

ggsave( filename = "methylation_cg24806663_raster.png", width = 3, height = 10 )


ggplot( plot.df.cpg2.ordered, aes( x = 1, y = indiv ) ) +
  geom_raster( aes( fill = beta ) ) +
  facet_grid( rows = vars( group ), scales = "free_y" ) +
  ylab( "Individual" ) +
  labs( title = "ch.17.52148184R" ) +
  theme( axis.text = element_blank(),
         axis.ticks = element_blank(),
         axis.title.x = element_blank(),
         legend.position = "bottom" )

ggsave( filename = "methylation_ch-17-52148184R_raster.png", width = 3, height = 10 )