# ==== QQ plots ====
library( SuppDists )
library( MASS )
library( Haplin )
## Registered S3 methods overwritten by 'ffbase':
##   method   from
##   [.ff     ff  
##   [.ffdf   ff  
##   [<-.ff   ff  
##   [<-.ffdf ff
library( ggplot2 )
library( ggrepel )

source( "gg_qqplot.R" )

# ==== CL dataset ====
# CL dataset
# ==== GxM ====
# GxM
load( "../GxM_PoOxM_new_betas_mult_CLO.RData" )

gxe.pval.promoter.cl <- t( sapply( gxe.prom.all, function(x){
    return( data.frame( pval = x$gxm$pval[ 2 ], df = x$gxm$df[ 2 ] ) )
} ) )

gxe.pval.enhancer.cl <- t( sapply( gxe.enh.all, function(x){
    return( data.frame( pval = x$gxm$gxe.test$pval[ 2 ], df = x$gxm$gxe.test$df[ 2 ] ) )
} ) )

gxe.pval.gene.cl <- t( sapply( gxe.gene.all, function(x){
    return( data.frame( pval = x$gxm$gxe.test$pval[ 2 ], df = x$gxm$gxe.test$df[ 2 ] ) )
} ) )

# creating the data.frame
cur.snp.names <- as.character( my.SNP.map$Marker[ match( c( rownames( gxe.pval.promoter.cl ),
        rownames( gxe.pval.enhancer.cl ), rownames( gxe.pval.gene.cl ) ),
            my.SNP.map$gene_name ) ] )

gxe.pval.all.cl <- data.frame( region = c( rep( "promoter", nrow( gxe.pval.promoter.cl ) ),
        rep( "enhancer", nrow( gxe.pval.enhancer.cl ) ), rep( "gene", nrow( gxe.pval.gene.cl ) ) ),
        gene.name = c( rownames( gxe.pval.promoter.cl ), rownames( gxe.pval.enhancer.cl ),
          rownames( gxe.pval.gene.cl ) ),
        snp.name = cur.snp.names,
        pval = unlist( c( gxe.pval.promoter.cl[ , "pval" ], gxe.pval.enhancer.cl[ , "pval" ],
          gxe.pval.gene.cl[ , "pval" ] ) ),
        df = unlist( c( gxe.pval.promoter.cl[ , "df" ], gxe.pval.enhancer.cl[ , "df" ],
          gxe.pval.gene.cl[ , "df" ] ) ) )
head( gxe.pval.all.cl )
##     region gene.name   snp.name       pval df
## 1 promoter      PAX7   rs742071 0.06400049  2
## 2 promoter     ABCA4   rs560426 0.42805451  2
## 3 promoter      IRF6   rs642961 0.55340321  2
## 4 promoter     THADA  rs7590268 0.17320790  2
## 5 promoter    8q21.3 rs12543318 0.32332097  2
## 6 promoter      8q24   rs987525 0.42999212  2
dim( gxe.pval.all.cl )
## [1] 25  5
df.all.cl <- unique( gxe.pval.all.cl$df )
df.all.cl
## [1] 2
# plotting
pvals.GxM.CL <- gxe.pval.all.cl$pval
names( pvals.GxM.CL ) <- paste( gxe.pval.all.cl$region, gxe.pval.all.cl$snp.name,
  sep = "_" )
pvals.GxM.CL
##   promoter_rs742071   promoter_rs560426   promoter_rs642961 
##          0.06400049          0.42805451          0.55340321 
##  promoter_rs7590268 promoter_rs12543318   promoter_rs987525 
##          0.17320790          0.32332097          0.42999212 
##  promoter_rs3758249  promoter_rs8001641  promoter_rs1873147 
##          0.87703914          0.53417280          0.39764783 
##   promoter_rs227731 promoter_rs13041247   enhancer_rs742071 
##          0.94003164          0.55666521          0.34534483 
##   enhancer_rs560426  enhancer_rs7590268   enhancer_rs987525 
##          0.90704433          0.10178491          0.41424021 
##  enhancer_rs7078160  enhancer_rs8001641       gene_rs742071 
##          0.65667196          0.63076406          0.91766032 
##       gene_rs560426       gene_rs642961      gene_rs7590268 
##          0.34030217          0.15917225          0.96394185 
##      gene_rs3758249      gene_rs7078160      gene_rs1873147 
##          0.79767473          0.65667196          0.63297345 
##     gene_rs13041247 
##          0.92200517
cur.len <- length( pvals.GxM.CL )
cur.nlabs <- 2

cur.xcoords <- get_x_coords( length( pvals.GxM.CL ), cur.nlabs )
cur.ycoords <- -log10( sort( pvals.GxM.CL ) )[ 1:cur.nlabs ]

cur.labels <- data.frame( x = cur.xcoords, y = cur.ycoords,
  label = names( cur.ycoords ) )
gg_qqplot( pvals.GxM.CL ) +
  labs( title = "GxMe, CLO dataset" ) +
  geom_label_repel( data = cur.labels, aes( x, y, label = label ) ) +
  theme( axis.text = element_text( size = 12 ), axis.title = element_text( size = 14 ),
         title = element_text( size = 16 ) )
## Warning: Removed 2 rows containing missing values (geom_path).

ggsave( filename = "GxM_CL_mult_betas_gg-qqplot_square.png", width = 14, units = "cm",
        height = 14 )
## Warning: Removed 2 rows containing missing values (geom_path).
# ==== POOxM ====
# now, PoOxM for CLO
pooxm.pval.promoter <- t( sapply( gxe.prom.all, function(x){
    return( data.frame( pval = x$pooxm$pval[ 2 ], df = x$pooxm$df[ 2 ] ) )
} ) )

pooxm.pval.enhancer <- t( sapply( gxe.enh.all, function(x){
    return( data.frame( pval = x$pooxm$gxe.test$pval[ 2 ], df = x$pooxm$gxe.test$df[ 2 ] ) )
} ) )

pooxm.pval.gene <- t( sapply( gxe.gene.all, function(x){
    return( data.frame( pval = x$pooxm$gxe.test$pval[ 2 ], df = x$pooxm$gxe.test$df[ 2 ] ) )
} ) )

# creating the data.frame
my.SNP.map$Marker <- tolower( my.SNP.map$Marker )
cur.snp.names <- my.SNP.map$Marker[ match( c( rownames( pooxm.pval.promoter ),
        rownames( pooxm.pval.enhancer ), rownames( pooxm.pval.gene ) ),
            my.SNP.map$gene_name ) ]

pooxm.pval.all <- data.frame( region = c( rep( "promoter", nrow( pooxm.pval.promoter ) ),
        rep( "enhancer", nrow( pooxm.pval.enhancer ) ), rep( "gene", nrow( pooxm.pval.gene ) ) ),
        gene.name = c( rownames( pooxm.pval.promoter ), rownames( pooxm.pval.enhancer ), rownames( pooxm.pval.gene ) ),
        snp.name = cur.snp.names,
        pval = unlist( c( pooxm.pval.promoter[ , "pval" ], pooxm.pval.enhancer[ , "pval" ], pooxm.pval.gene[ , "pval" ] ) ),
        df = unlist( c( pooxm.pval.promoter[ , "df" ], pooxm.pval.enhancer[ , "df" ], pooxm.pval.gene[ , "df" ] ) ) )
head( pooxm.pval.all )
##     region gene.name   snp.name       pval df
## 1 promoter      PAX7   rs742071 0.92671452  2
## 2 promoter     ABCA4   rs560426 0.71507668  2
## 3 promoter      IRF6   rs642961 0.10381984  2
## 4 promoter     THADA  rs7590268 0.60047269  2
## 5 promoter    8q21.3 rs12543318 0.02428055  2
## 6 promoter      8q24   rs987525 0.08685998  2
dim( pooxm.pval.all )
## [1] 25  5
df.all.pooxm <- unique( pooxm.pval.all$df )
df.all.pooxm
## [1] 2
# plotting
pvals.POOxM.CL <- pooxm.pval.all$pval
names( pvals.POOxM.CL ) <- paste( pooxm.pval.all$region, pooxm.pval.all$snp.name, sep = "_" )
pvals.POOxM.CL
##   promoter_rs742071   promoter_rs560426   promoter_rs642961 
##          0.92671452          0.71507668          0.10381984 
##  promoter_rs7590268 promoter_rs12543318   promoter_rs987525 
##          0.60047269          0.02428055          0.08685998 
##  promoter_rs3758249  promoter_rs8001641  promoter_rs1873147 
##          0.61687631          0.14989054          0.32261930 
##   promoter_rs227731 promoter_rs13041247   enhancer_rs742071 
##          0.01193684          0.31593297          0.54294633 
##   enhancer_rs560426  enhancer_rs7590268   enhancer_rs987525 
##          0.95324526          0.11209232          0.52952510 
##  enhancer_rs7078160  enhancer_rs8001641       gene_rs742071 
##          0.55193606          0.02946621          0.29444331 
##       gene_rs560426       gene_rs642961      gene_rs7590268 
##          0.01350852          0.91256337          0.33880289 
##      gene_rs3758249      gene_rs7078160      gene_rs1873147 
##          0.57162216          0.55193606          0.07496376 
##     gene_rs13041247 
##          0.40997150
cur.len <- length( pvals.POOxM.CL )
cur.nlabs <- 8

cur.xcoords <- get_x_coords( length( pvals.POOxM.CL ), cur.nlabs )
cur.ycoords <- -log10( sort( pvals.POOxM.CL ) )[ 1:cur.nlabs ]

cur.labels <- data.frame( x = cur.xcoords, y = cur.ycoords,
  label = names( cur.ycoords ) )
gg_qqplot( pvals.POOxM.CL ) +
  labs( title = "PoOxMe, CLO dataset" ) +
  geom_label_repel( data = cur.labels, aes( x, y, label = label ), xlim = c( 1,NA ) ) +
  theme( axis.text = element_text( size = 12 ), axis.title = element_text( size = 14 ),
         title = element_text( size = 16 ) )
## Warning: Removed 2 rows containing missing values (geom_path).

ggsave( filename = "PoOxM_CL_mult_betas_gg-qqplot_square.png", width = 14, units = "cm",
        height = 14 )
## Warning: Removed 2 rows containing missing values (geom_path).
# ==== CL/P dataset ====
load( "../GxM_PoOxM_new_betas_mult_CLorP.RData" )

# ==== GxM ====
# GxM
gxe.pval.promoter.clorp <- t( sapply( gxe.prom.all, function(x){
    return( data.frame( pval = x$gxm$pval[ 2 ], df = x$gxm$df[ 2 ] ) )
} ) )

gxe.pval.enhancer.clorp <- t( sapply( gxe.enh.all, function(x){
    return( data.frame( pval = x$gxm$gxe.test$pval[ 2 ], df = x$gxm$gxe.test$df[ 2 ] ) )
} ) )

gxe.pval.gene.clorp <- t( sapply( gxe.gene.all, function(x){
    return( data.frame( pval = x$gxm$gxe.test$pval[ 2 ], df = x$gxm$gxe.test$df[ 2 ] ) )
} ) )

# creating the data.frame
cur.snp.names <- as.character( my.SNP.map$Marker[ match( c( rownames( gxe.pval.promoter.clorp ),
        rownames( gxe.pval.enhancer.clorp ), rownames( gxe.pval.gene.clorp ) ),
            my.SNP.map$gene_name ) ] )

gxe.pval.all.clorp <- data.frame( region = c( rep( "promoter", nrow( gxe.pval.promoter.clorp ) ),
        rep( "enhancer", nrow( gxe.pval.enhancer.clorp ) ), rep( "gene", nrow( gxe.pval.gene.clorp ) ) ),
        gene.name = c( rownames( gxe.pval.promoter.clorp ), rownames( gxe.pval.enhancer.clorp ),
          rownames( gxe.pval.gene.clorp ) ),
        snp.name = cur.snp.names,
        pval = unlist( c( gxe.pval.promoter.clorp[ , "pval" ], gxe.pval.enhancer.clorp[ , "pval" ],
          gxe.pval.gene.clorp[ , "pval" ] ) ),
        df = unlist( c( gxe.pval.promoter.clorp[ , "df" ], gxe.pval.enhancer.clorp[ , "df" ],
          gxe.pval.gene.clorp[ , "df" ] ) ) )
head( gxe.pval.all.clorp )
##     region gene.name   snp.name       pval df
## 1 promoter      PAX7   rs742071 0.19699287  2
## 2 promoter     ABCA4   rs560426 0.78069733  2
## 3 promoter      IRF6   rs642961 0.79869026  2
## 4 promoter     THADA  rs7590268 0.69939504  2
## 5 promoter    8q21.3 rs12543318 0.01231238  2
## 6 promoter      8q24   rs987525 0.11788477  2
dim( gxe.pval.all.clorp )
## [1] 25  5
df.all.clorp <- unique( gxe.pval.all.clorp$df )
df.all.clorp
## [1] 2
# plotting
pvals.GxM.clorp <- gxe.pval.all.clorp$pval
names( pvals.GxM.clorp ) <- paste( gxe.pval.all.clorp$region, gxe.pval.all.clorp$snp.name,
  sep = "_" )
pvals.GxM.clorp
##   promoter_rs742071   promoter_rs560426   promoter_rs642961 
##          0.19699287          0.78069733          0.79869026 
##  promoter_rs7590268 promoter_rs12543318   promoter_rs987525 
##          0.69939504          0.01231238          0.11788477 
##  promoter_rs3758249  promoter_rs8001641  promoter_rs1873147 
##          0.65311965          0.05713693          0.30101309 
##   promoter_rs227731 promoter_rs13041247   enhancer_rs742071 
##          0.78033536          0.87885490          0.59389868 
##   enhancer_rs560426  enhancer_rs7590268   enhancer_rs987525 
##          0.64617122          0.48595842          0.44971466 
##  enhancer_rs7078160  enhancer_rs8001641       gene_rs742071 
##          0.63353369          0.14277888          0.29639614 
##       gene_rs560426       gene_rs642961      gene_rs7590268 
##          0.12167850          0.58935782          0.64241648 
##      gene_rs3758249      gene_rs7078160      gene_rs1873147 
##          0.06864719          0.63353369          0.83821146 
##     gene_rs13041247 
##          0.81390511
cur.len <- length( pvals.GxM.clorp )
cur.nlabs <- 2

cur.xcoords <- get_x_coords( length( pvals.GxM.clorp ), cur.nlabs )
cur.ycoords <- -log10( sort( pvals.GxM.clorp ) )[ 1:cur.nlabs ]

cur.labels <- data.frame( x = cur.xcoords, y = cur.ycoords,
  label = names( cur.ycoords ) )
gg_qqplot( pvals.GxM.clorp ) +
  labs( title = "GxMe, CL/P dataset" ) +
  geom_label_repel( data = cur.labels, aes( x, y, label = label ) ) +
  theme( axis.text = element_text( size = 12 ), axis.title = element_text( size = 14 ),
         title = element_text( size = 16 ) )
## Warning: Removed 2 rows containing missing values (geom_path).

ggsave( filename = "GxM_CLorP_mult_betas_gg-qqplot_square.png", width = 14, units = "cm",
        height = 14 )
## Warning: Removed 2 rows containing missing values (geom_path).
# ==== POOxM ====
# now, PoOxM for CL/P
pooxm.pval.promoter <- t( sapply( gxe.prom.all, function(x){
    return( data.frame( pval = x$pooxm$pval[ 2 ], df = x$pooxm$df[ 2 ] ) )
} ) )

pooxm.pval.enhancer <- t( sapply( gxe.enh.all, function(x){
    return( data.frame( pval = x$pooxm$gxe.test$pval[ 2 ], df = x$pooxm$gxe.test$df[ 2 ] ) )
} ) )

pooxm.pval.gene <- t( sapply( gxe.gene.all, function(x){
    return( data.frame( pval = x$pooxm$gxe.test$pval[ 2 ], df = x$pooxm$gxe.test$df[ 2 ] ) )
} ) )

# creating the data.frame
my.SNP.map$Marker <- tolower( my.SNP.map$Marker )
cur.snp.names <- my.SNP.map$Marker[ match( c( rownames( pooxm.pval.promoter ),
        rownames( pooxm.pval.enhancer ), rownames( pooxm.pval.gene ) ),
            my.SNP.map$gene_name ) ]

pooxm.pval.all <- data.frame( region = c( rep( "promoter", nrow( pooxm.pval.promoter ) ),
        rep( "enhancer", nrow( pooxm.pval.enhancer ) ), rep( "gene", nrow( pooxm.pval.gene ) ) ),
        gene.name = c( rownames( pooxm.pval.promoter ), rownames( pooxm.pval.enhancer ), rownames( pooxm.pval.gene ) ),
        snp.name = cur.snp.names,
        pval = unlist( c( pooxm.pval.promoter[ , "pval" ], pooxm.pval.enhancer[ , "pval" ], pooxm.pval.gene[ , "pval" ] ) ),
        df = unlist( c( pooxm.pval.promoter[ , "df" ], pooxm.pval.enhancer[ , "df" ], pooxm.pval.gene[ , "df" ] ) ) )
head( pooxm.pval.all )
##     region gene.name   snp.name      pval df
## 1 promoter      PAX7   rs742071 0.4903087  2
## 2 promoter     ABCA4   rs560426 0.7649082  2
## 3 promoter      IRF6   rs642961 0.3370442  2
## 4 promoter     THADA  rs7590268 0.4365098  2
## 5 promoter    8q21.3 rs12543318 0.4206556  2
## 6 promoter      8q24   rs987525 0.0584274  2
dim( pooxm.pval.all )
## [1] 25  5
df.all.pooxm <- unique( pooxm.pval.all$df )
df.all.pooxm
## [1] 2
# plotting
pvals.POOxM.clorp <- pooxm.pval.all$pval
names( pvals.POOxM.clorp ) <- paste( pooxm.pval.all$region, pooxm.pval.all$snp.name, sep = "_" )
pvals.POOxM.clorp
##   promoter_rs742071   promoter_rs560426   promoter_rs642961 
##         0.490308668         0.764908243         0.337044234 
##  promoter_rs7590268 promoter_rs12543318   promoter_rs987525 
##         0.436509753         0.420655613         0.058427399 
##  promoter_rs3758249  promoter_rs8001641  promoter_rs1873147 
##         0.059053176         0.343891913         0.126549691 
##   promoter_rs227731 promoter_rs13041247   enhancer_rs742071 
##         0.003223564         0.105488490         0.434832066 
##   enhancer_rs560426  enhancer_rs7590268   enhancer_rs987525 
##         0.547990224         0.054367326         0.867650691 
##  enhancer_rs7078160  enhancer_rs8001641       gene_rs742071 
##         0.134494271         0.608589146         0.997708266 
##       gene_rs560426       gene_rs642961      gene_rs7590268 
##         0.906781682         0.014342080         0.486847137 
##      gene_rs3758249      gene_rs7078160      gene_rs1873147 
##         0.040261280         0.134494271         0.420966767 
##     gene_rs13041247 
##         0.211418716
cur.len <- length( pvals.POOxM.clorp )
cur.nlabs <- 8

cur.xcoords <- get_x_coords( length( pvals.POOxM.clorp ), cur.nlabs )
cur.ycoords <- -log10( sort( pvals.POOxM.clorp ) )[ 1:cur.nlabs ]

cur.labels <- data.frame( x = cur.xcoords, y = cur.ycoords,
  label = names( cur.ycoords ) )
gg_qqplot( pvals.POOxM.clorp ) +
  labs( title = "PoOxMe, CL/P dataset" ) +
  geom_label_repel( data = cur.labels, aes( x, y, label = label ), #xlim = c( 1.5,NA ),
                    force = 10, nudge_y = cur.labels$x - 1, nudge_x = 2 - cur.labels$x ) +
  theme( axis.text = element_text( size = 12 ), axis.title = element_text( size = 14 ),
         title = element_text( size = 16 ) )

ggsave( filename = "PoOxM_CLorP_mult_betas_gg-qqplot_square.png", width = 14, units = "cm",
        height = 14 )



# ==== CLP dataset ====
load( "../GxM_PoOxM_new_betas_mult_CLP.RData" )

# ==== GxM ====
# GxM
gxe.pval.promoter.clp <- t( sapply( gxe.prom.all, function(x){
    return( data.frame( pval = x$gxm$pval[ 2 ], df = x$gxm$df[ 2 ] ) )
} ) )

gxe.pval.enhancer.clp <- t( sapply( gxe.enh.all, function(x){
    return( data.frame( pval = x$gxm$gxe.test$pval[ 2 ], df = x$gxm$gxe.test$df[ 2 ] ) )
} ) )

gxe.pval.gene.clp <- t( sapply( gxe.gene.all, function(x){
    return( data.frame( pval = x$gxm$gxe.test$pval[ 2 ], df = x$gxm$gxe.test$df[ 2 ] ) )
} ) )

# creating the data.frame
cur.snp.names <- as.character( my.SNP.map$Marker[ match( c( rownames( gxe.pval.promoter.clp ),
        rownames( gxe.pval.enhancer.clp ), rownames( gxe.pval.gene.clp ) ),
            my.SNP.map$gene_name ) ] )

gxe.pval.all.clp <- data.frame( region = c( rep( "promoter", nrow( gxe.pval.promoter.clp ) ),
        rep( "enhancer", nrow( gxe.pval.enhancer.clp ) ), rep( "gene", nrow( gxe.pval.gene.clp ) ) ),
        gene.name = c( rownames( gxe.pval.promoter.clp ), rownames( gxe.pval.enhancer.clp ),
          rownames( gxe.pval.gene.clp ) ),
        snp.name = cur.snp.names,
        pval = unlist( c( gxe.pval.promoter.clp[ , "pval" ], gxe.pval.enhancer.clp[ , "pval" ],
          gxe.pval.gene.clp[ , "pval" ] ) ),
        df = unlist( c( gxe.pval.promoter.clp[ , "df" ], gxe.pval.enhancer.clp[ , "df" ],
          gxe.pval.gene.clp[ , "df" ] ) ) )
head( gxe.pval.all.clp )
##     region gene.name   snp.name      pval df
## 1 promoter      PAX7   rs742071 0.2806608  2
## 2 promoter     ABCA4   rs560426 0.2512834  2
## 3 promoter      IRF6   rs642961 0.8077746  2
## 4 promoter     THADA  rs7590268 0.6258189  2
## 5 promoter    8q21.3 rs12543318 0.2889103  2
## 6 promoter      8q24   rs987525 0.2100116  2
dim( gxe.pval.all.clp )
## [1] 25  5
df.all.clp <- unique( gxe.pval.all.clp$df )
df.all.clp
## [1] 2
# plotting
pvals.GxM.clp <- gxe.pval.all.clp$pval
names( pvals.GxM.clp ) <- paste( gxe.pval.all.clp$region, gxe.pval.all.clp$snp.name,
  sep = "_" )
pvals.GxM.clp
##   promoter_rs742071   promoter_rs560426   promoter_rs642961 
##         0.280660840         0.251283381         0.807774607 
##  promoter_rs7590268 promoter_rs12543318   promoter_rs987525 
##         0.625818873         0.288910280         0.210011574 
##  promoter_rs3758249  promoter_rs8001641  promoter_rs1873147 
##         0.703687666         0.217081847         0.230290238 
##   promoter_rs227731 promoter_rs13041247   enhancer_rs742071 
##         0.786378069         0.955859512         0.500717570 
##   enhancer_rs560426  enhancer_rs7590268   enhancer_rs987525 
##         0.430706012         0.899082901         0.541993252 
##  enhancer_rs7078160  enhancer_rs8001641       gene_rs742071 
##         0.439422383         0.247811659         0.190955186 
##       gene_rs560426       gene_rs642961      gene_rs7590268 
##         0.686837728         0.885820221         0.975074491 
##      gene_rs3758249      gene_rs7078160      gene_rs1873147 
##         0.001039765         0.439422383         0.385127538 
##     gene_rs13041247 
##         0.842246318
cur.len <- length( pvals.GxM.clp )
cur.nlabs <- 2

cur.xcoords <- get_x_coords( length( pvals.GxM.clp ), cur.nlabs )
cur.ycoords <- -log10( sort( pvals.GxM.clp ) )[ 1:cur.nlabs ]

cur.labels <- data.frame( x = cur.xcoords, y = cur.ycoords,
  label = names( cur.ycoords ) )
gg_qqplot( pvals.GxM.clp ) +
  labs( title = "GxMe, CLP dataset" ) +
  geom_label_repel( data = cur.labels, aes( x, y, label = label ) ) +
  theme( axis.text = element_text( size = 12 ), axis.title = element_text( size = 14 ),
         title = element_text( size = 16 ) )