library( ff )
## Loading required package: bit
## Attaching package bit
## package:bit (c) 2008-2012 Jens Oehlschlaegel (GPL-2)
## creators: bit bitwhich
## coercion: as.logical as.integer as.bit as.bitwhich which
## operator: ! & | xor != ==
## querying: print length any all min max range sum summary
## bit access: length<- [ [<- [[ [[<-
## for more help type ?bit
##
## Attaching package: 'bit'
## The following object is masked from 'package:base':
##
## xor
## Attaching package ff
## - getOption("fftempdir")=="C:/Users/jro049/AppData/Local/Temp/RtmpcB0PXJ"
## - getOption("ffextension")=="ff"
## - getOption("ffdrop")==TRUE
## - getOption("fffinonexit")==TRUE
## - getOption("ffpagesize")==65536
## - getOption("ffcaching")=="mmnoflush" -- consider "ffeachflush" if your system stalls on large writes
## - getOption("ffbatchbytes")==1030781665.28 -- consider a different value for tuning your system
## - getOption("ffmaxbytes")==51539083264 -- consider a different value for tuning your system
##
## Attaching package: 'ff'
## The following objects are masked from 'package:bit':
##
## clone, clone.default, clone.list
## The following objects are masked from 'package:utils':
##
## write.csv, write.csv2
## The following objects are masked from 'package:base':
##
## is.factor, is.ordered
library( ggplot2 )
library( SuppDists )
library( Haplin )
library( dplyr )
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library( reshape2 )
library( matrixStats )
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
source( "gaphunter2.R" )
source( "../common_variables.R" )
map.ok <- read.table( "../new_SNPs.map", header = TRUE, stringsAsFactors = FALSE )
load( "../new_betas_matrices_ensembl_promoter_enhancer.RData" )
load( "../new_betas_df_subgroup_gene.RData" )
ls()
## [1] "betas.df.subgroup.enhancer.list" "betas.df.subgroup.gene.list"
## [3] "betas.df.subgroup.promoter.list" "betas.matrix.enhancer.list"
## [5] "betas.matrix.gene.list" "betas.matrix.promoter.list"
## [7] "cleft.types" "ffdata.file.names"
## [9] "file.names" "file.names.root"
## [11] "gaphunter2" "map.ok"
# just testing on one case:
dim( betas.matrix.gene.list$CLO$PAX7 )
## [1] 103 54
matrix.tmp <- t( betas.matrix.gene.list$CLO$PAX7 )
gaphunter.res <- gaphunter2( matrix.tmp )
## [gaphunter] Using 54 probes and 103 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found 2 gap signals.
## [gaphunter] Filtering out gap signals driven by outliers.
## [gaphunter] Removed 0 gap signals driven by outliers from results.
gaphunter.res
## $proberesults
## Groups Group1 Group2 Group3
## cg04276953 3 1 96 6
## cg12080079 3 1 101 1
##
## $sampleresults
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## cg04276953 2 2 2 2 3 2 3 2 2 2 2 2
## cg12080079 2 2 2 2 2 2 2 2 2 2 2 2
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22]
## cg04276953 2 2 2 2 2 2 2 2 2 1
## cg12080079 2 2 2 2 2 2 2 2 2 2
## [,23] [,24] [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32]
## cg04276953 2 2 2 2 2 2 2 2 2 2
## cg12080079 2 2 2 2 3 2 2 2 2 2
## [,33] [,34] [,35] [,36] [,37] [,38] [,39] [,40] [,41] [,42]
## cg04276953 2 2 2 2 2 2 2 2 2 2
## cg12080079 2 2 2 2 2 2 2 2 2 2
## [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50] [,51] [,52]
## cg04276953 2 2 2 2 3 2 2 2 2 2
## cg12080079 2 2 2 2 2 2 2 2 2 2
## [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
## cg04276953 2 2 2 2 2 2 2 2 2 2
## cg12080079 2 2 2 2 2 2 2 2 2 2
## [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72]
## cg04276953 2 2 2 3 2 2 2 2 2 2
## cg12080079 2 2 2 1 2 2 2 2 2 2
## [,73] [,74] [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82]
## cg04276953 2 2 2 2 2 2 2 2 2 2
## cg12080079 2 2 2 2 2 2 2 2 2 2
## [,83] [,84] [,85] [,86] [,87] [,88] [,89] [,90] [,91] [,92]
## cg04276953 2 2 2 2 2 2 2 3 2 2
## cg12080079 2 2 2 2 2 2 2 2 2 2
## [,93] [,94] [,95] [,96] [,97] [,98] [,99] [,100] [,101] [,102]
## cg04276953 2 2 2 2 2 3 2 2 2 2
## cg12080079 2 2 2 2 2 2 2 2 2 2
## [,103]
## cg04276953 2
## cg12080079 2
##
## $algorithm
## $algorithm$threshold
## [1] 0.05
##
## $algorithm$outCutoff
## [1] 0.01
##
## $algorithm$keepOutliers
## [1] FALSE
# combine the matrices to include all the individuals (per gene)
# -- for the gene regions --
sapply( betas.matrix.gene.list, length )
## CLO CL/P CPO CLP cntrl
## 8 8 8 8 8
genes <- names( betas.matrix.gene.list[[ 1 ]] )
genes
## [1] "PAX7" "ABCA4" "IRF6" "THADA" "FOXE1" "KIAA1598"
## [7] "TPM1" "MAFB"
betas.matrix.gene.all <- lapply( genes, function( x ){
tmp.list <- lapply( betas.matrix.gene.list, function( y ){
return( y[[ x ]] )
} )
tmp.m <- do.call( rbind, tmp.list )
unique.indiv <- unique( rownames( tmp.m ) )
out.m <- tmp.m[ unique.indiv,, drop = FALSE ]
return( out.m )
} )
names( betas.matrix.gene.all ) <- genes
# -- for the enhancer regions --
genes <- names( betas.matrix.enhancer.list[[ 1 ]] )
genes
## [1] "PAX7" "ABCA4" "THADA" "8q24" "KIAA1598" "SPRY2"
betas.matrix.enhancer.all <- lapply( genes, function( x ){
tmp.list <- lapply( betas.matrix.enhancer.list, function( y ){
return( y[[ x ]] )
} )
tmp.m <- do.call( rbind, tmp.list )
unique.indiv <- unique( rownames( tmp.m ) )
out.m <- tmp.m[ unique.indiv,, drop = FALSE ]
return( out.m )
} )
names( betas.matrix.enhancer.all ) <- genes
# -- for the promoter regions --
genes <- names( betas.matrix.promoter.list[[ 1 ]] )
genes
## [1] "PAX7" "ABCA4" "IRF6" "THADA" "8q21.3" "8q24" "FOXE1"
## [8] "SPRY2" "TPM1" "NOG1" "MAFB"
betas.matrix.promoter.all <- lapply( genes, function( x ){
tmp.list <- lapply( betas.matrix.promoter.list, function( y ){
return( y[[ x ]] )
} )
tmp.m <- do.call( rbind, tmp.list )
unique.indiv <- unique( rownames( tmp.m ) )
out.m <- tmp.m[ unique.indiv,, drop = FALSE ]
return( out.m )
} )
names( betas.matrix.promoter.all ) <- genes
# running the gap hunter on all the subsets:
gaphunter.res.genes <- lapply( names( betas.matrix.gene.all ), function(x){
cat( " GENE: ",x, "\n")
m <- t( betas.matrix.gene.all[[ x ]] )
out <- gaphunter2( m )
cat( "\n")
return( out )
} )
## GENE: PAX7
## [gaphunter] Using 54 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found 1 gap signals.
## [gaphunter] Filtering out gap signals driven by outliers.
## [gaphunter] Removed 1 gap signals driven by outliers from results.
##
## GENE: ABCA4
## [gaphunter] Using 16 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found 2 gap signals.
## [gaphunter] Filtering out gap signals driven by outliers.
## [gaphunter] Removed 0 gap signals driven by outliers from results.
##
## GENE: IRF6
## [gaphunter] Using 27 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found 1 gap signals.
## [gaphunter] Filtering out gap signals driven by outliers.
## [gaphunter] Removed 1 gap signals driven by outliers from results.
##
## GENE: THADA
## [gaphunter] Using 14 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found 1 gap signals.
## [gaphunter] Filtering out gap signals driven by outliers.
## [gaphunter] Removed 1 gap signals driven by outliers from results.
##
## GENE: FOXE1
## [gaphunter] Using 7 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: KIAA1598
## [gaphunter] Using 1 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: TPM1
## [gaphunter] Using 29 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: MAFB
## [gaphunter] Using 9 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
names( gaphunter.res.genes ) <- names( betas.matrix.gene.all )
gaphunter.res.prom <- lapply( names( betas.matrix.promoter.all ), function(x){
cat( " GENE: ",x, "\n")
m <- t( betas.matrix.promoter.all[[ x ]] )
out <- gaphunter2( m )
cat( "\n")
return( out )
} )
## GENE: PAX7
## [gaphunter] Using 19 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: ABCA4
## [gaphunter] Using 1 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: IRF6
## [gaphunter] Using 14 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: THADA
## [gaphunter] Using 1 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: 8q21.3
## [gaphunter] Using 1 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: 8q24
## [gaphunter] Using 1 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: FOXE1
## [gaphunter] Using 6 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: SPRY2
## [gaphunter] Using 1 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: TPM1
## [gaphunter] Using 30 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: NOG1
## [gaphunter] Using 2 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: MAFB
## [gaphunter] Using 21 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found 1 gap signals.
## [gaphunter] Filtering out gap signals driven by outliers.
## [gaphunter] Removed 1 gap signals driven by outliers from results.
names( gaphunter.res.prom ) <- names( betas.matrix.promoter.all )
gaphunter.res.enh <- lapply( names( betas.matrix.enhancer.all ), function(x){
cat( " GENE: ",x, "\n")
m <- t( betas.matrix.enhancer.all[[ x ]] )
out <- gaphunter2( m )
cat( "\n")
return( out )
} )
## GENE: PAX7
## [gaphunter] Using 1 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: ABCA4
## [gaphunter] Using 6 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: THADA
## [gaphunter] Using 1 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: 8q24
## [gaphunter] Using 1 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: KIAA1598
## [gaphunter] Using 1 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
##
## GENE: SPRY2
## [gaphunter] Using 1 probes and 865 samples.
## [gaphunter] Searching for gap signals.
## [gaphunter] Found no gap signals.
names( gaphunter.res.enh ) <- names( betas.matrix.enhancer.all )
# create a list with only the gap CpGs
which.res.empty <- sapply( gaphunter.res.genes, function(x){
if( is.null( x$proberesults ) ){
return( TRUE )
}
return( nrow( x$proberesults ) == 0 )
} )
gaphunter.res.genes <- gaphunter.res.genes[ !which.res.empty ]
if( length( gaphunter.res.genes ) != 0 ){
gaps.cpgs.genes <- lapply( names( gaphunter.res.genes ), function( x ){
cat( " GENE: ",x, "\n")
out <- rownames( gaphunter.res.genes[[ x ]]$sampleresults )
print( out )
cat( "\n")
return( out )
} )
names( gaps.cpgs.genes ) <- names( gaphunter.res.genes )
}
## GENE: ABCA4
## [1] "cg01921066" "cg04350215"
which.res.empty <- sapply( gaphunter.res.prom, function(x){
if( is.null( x$proberesults ) ){
return( TRUE )
}
return( nrow( x$proberesults ) == 0 )
} )
gaphunter.res.prom <- gaphunter.res.prom[ !which.res.empty ]
if( length( gaphunter.res.prom ) != 0 ){
gaps.cpgs.prom <- lapply( names( gaphunter.res.prom ), function( x ){
cat( " GENE: ",x, "\n")
out <- rownames( gaphunter.res.prom[[ x ]]$sampleresults )
print( out )
cat( "\n")
return( out )
} )
names( gaps.cpgs.prom ) <- names( gaphunter.res.prom )
}
which.res.empty <- sapply( gaphunter.res.enh, function(x){
if( is.null( x$proberesults ) ){
return( TRUE )
}
return( nrow( x$proberesults ) == 0 )
} )
gaphunter.res.enh <- gaphunter.res.enh[ !which.res.empty ]
if( length( gaphunter.res.enh ) != 0 ){
gaps.cpgs.enh <- lapply( names( gaphunter.res.enh ), function( x ){
cat( " GENE: ",x, "\n")
out <- rownames( gaphunter.res.enh[[ x ]]$sampleresults )
print( out )
cat( "\n")
return( out )
} )
names( gaps.cpgs.enh ) <- names( gaphunter.res.enh )
}
# create a data frame:
gaps.cpgs.genes.df <- melt( gaps.cpgs.genes )
gaps.cpgs.genes.df
## value L1
## 1 cg01921066 ABCA4
## 2 cg04350215 ABCA4
gaps.cpgs.all.df <- cbind( gaps.cpgs.genes.df, region = "gene" )
names.gaps.df <- c( "cpg", "gene", "region" )
names( gaps.cpgs.all.df ) <- names.gaps.df
gaps.cpgs.all.df <- arrange( gaps.cpgs.all.df, cpg )
gaps.cpgs.all.df
## cpg gene region
## 1 cg01921066 ABCA4 gene
## 2 cg04350215 ABCA4 gene
save( gaps.cpgs.genes, gaps.cpgs.all.df, file = "gaps_CpGs_all.RData" )
# grouping the output
gaps.cpgs.all.df <- group_by( gaps.cpgs.all.df, cpg )
gaps.cpgs.all.df
## # A tibble: 2 x 3
## # Groups: cpg [2]
## cpg gene region
## <fct> <chr> <fct>
## 1 cg01921066 ABCA4 gene
## 2 cg04350215 ABCA4 gene
cpg.all <- read.csv( "../../ORIG_DATA/cpg_info_all.csv", stringsAsFactors = FALSE )
snps.Iowa.all <- read.table( "../unique_SNPs.dat", stringsAsFactors = FALSE )
# genetic data - checking whether I have SNPs within the probes found
# to be bimodal by gaphunter
# for each CpG
unique.gaps.cpgs <- unique( gaps.cpgs.all.df$cpg )
for( cur.cpg in unique.gaps.cpgs ){
cat( "\n" )
cat( "CURRENT CpG: ", cur.cpg, "\n" )
cur.regions <- filter( gaps.cpgs.all.df, cpg == cur.cpg ) %>%
select( cpg, region ) %>% distinct()
# info about the curent CpG and the probe
cur.probe.snps <- filter( cpg.all, TargetID == cur.cpg ) %>%
select( TargetID, PROBE_SNPS )
if( cur.probe.snps$PROBE_SNPS != "" ){
cur.probe.snps <- cur.probe.snps$PROBE_SNPS
existing.snps <- cur.probe.snps[ cur.probe.snps %in% snps.Iowa.all ]
if( length( existing.snps ) != 0 ){
cat( " SNPs found: ", existing.snps, "\n" )
# for each SNP
# for( cur.snp in existing.snps ){
# cat( " CURRENT SNP: ", cur.snp, "\n" )
#
# haplin.data.Iowa <- genDataLoad( file.path( "..", ffdata.file.names[ chosen.cleft ] ) )
#
# ids.final <- read.table( file = file.path( "..", file.names[[ chosen.cleft ]] ),
# stringsAsFactors = FALSE, header = TRUE )
#
# which.cur.snp <- match( cur.snp, as.character( map.ok$snpname ) )
#
# haplin.data.cur.SNP <- genDataGetPart( haplin.data.Iowa, design = "triad",
# markers = which.cur.snp, overwrite = TRUE )
# haplin.data.cur.SNP
#
# if( "gene" %in% cur.regions ){
# betas.df.subgroup.tmp <- filter(
# betas.df.subgroup.gene.list[[ chosen.cleft ]], CpG == cur.cpg )
# } else if ( "promoter" %in% cur.regions ){
# betas.df.subgroup.tmp <- filter(
# betas.df.subgroup.promoter.list[[ chosen.cleft ]], CpG == cur.cpg )
# } else {
# betas.df.subgroup.tmp <- filter(
# betas.df.subgroup.enhancer.list[[ chosen.cleft ]], CpG == cur.cpg )
# }
# which.indiv.cur.cpg <- betas.df.subgroup.tmp$indiv
#
# unique.indiv <- unique( which.indiv.cur.cpg )
# fam.id <- as.character( sapply( unique.indiv, function(x){
# cur.row <- ids.final.cl[ ids.final.cl$ID.BSI == x, ]
# ids3.cl$cidr3[ cur.row$key2.fam ]
# } ) )
#
# file.subset.haplin <- paste( "CL_subset", cur.snp, cur.cpg, sep = "_" )
# haplin.subset.data <- genDataGetPart( data.in = haplin.data.cur.SNP, design = "triad",
# indiv.ids = fam.id, file.out = file.subset.haplin, overwrite = TRUE )
#
# cat( " ALLELE TABLE:\n" )
# allele.tab.cur.snp <- table( haplin.subset.data$gen.data[[1]][,] )
# allele.tab.cur.snp
#
# allele.tab.cur.snp[ 1 ]/sum( allele.tab.cur.snp )
# allele.tab.cur.snp[ 2 ]/sum( allele.tab.cur.snp )
# cat( "\n" )
#
# # now, I need to construct a data.frame with the indiv.id, allele and methyl.level:
# SNP.CpG.df <- data.frame( indiv.id = unique.indiv, fam.id = fam.id,
# cur.snp = paste0(
# as.character( haplin.subset.data$gen.data[[1]][,1] ),
# as.character( haplin.subset.data$gen.data[[1]][,2] ) ),
# cur.cpg = betas.df.subgroup.tmp$beta
# )
# SNP.CpG.df
#
# ggplot( SNP.CpG.df, aes( as.factor( cur.snp ), cur.cpg ) ) +
# geom_jitter( aes( colour = as.factor( cur.snp ) ) ) +
# scale_color_discrete( "allele" ) +
# xlab( paste( "SNP", cur.snp ) ) +
# ylab( paste( "CpG", cur.cpg ) )
# }
} else {
message( "No probe SNPs in our data!" )
}
} else {
# maybe there are SNPs not in the manifest but in our data?
cur.cpg.pos <- filter( cpg.all, TargetID == cur.cpg ) %>%
select( TargetID, MAPINFO, CHR, STRAND )
cur.range <- cur.cpg.pos$MAPINFO
if( cur.cpg.pos$STRAND == "F" ){
# forward strand
cur.range <- c( cur.range, cur.range + 50 )
} else {
# reverse strand
cur.range <- c( cur.range - 50, cur.range )
}
message( "No probe SNPS for ", cur.cpg )
}
}
##
## CURRENT CpG: cg01921066
## No probe SNPS for cg01921066
##
## CURRENT CpG: cg04350215
## No probe SNPS for cg04350215
# plotting only the gaps CpGs
library( ggExtra )
# gather all the data for those CpGs
chosen.gene <- unique( gaps.cpgs.all.df$gene )
bvals.plotting.list <- lapply( names( betas.df.subgroup.gene.list ), function(x){
cur.df <- betas.df.subgroup.gene.list[[ x ]]
out <- filter( cur.df, name == chosen.gene & CpG %in% gaps.cpgs.all.df$cpg )
return( cbind( out, cleft_type = x ) )
})
sapply( bvals.plotting.list, dim )
## [,1] [,2] [,3] [,4] [,5]
## [1,] 206 538 280 332 912
## [2,] 10 10 10 10 10
bvals.plotting.df <- do.call( bind_rows, bvals.plotting.list )
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
ggplot( bvals.plotting.df, aes( x= cleft_type, y = beta ) ) +
geom_jitter() +
facet_wrap( .~CpG )

# plotting to check the correlation with the SNP we're investigating
bvals.plotting.df$allele1 <- ""
bvals.plotting.df$allele2 <- ""
gaps.cpgs.genotypes.list <- list()
for( chosen.cleft in cleft.types ){
cat( "\n\nCLEFT TYPE: ", chosen.cleft, "\n")
haplin.data.Iowa <- genDataLoad( file.path( "..", ffdata.file.names[ chosen.cleft ] ) )
ids.final <- read.table( file = file.path( "..", file.names[[ chosen.cleft ]] ),
stringsAsFactors = FALSE, header = TRUE )
which.cur.snp <- match( chosen.gene, as.character( map.ok$gene_name ) )
haplin.data.cur.SNP <- genDataGetPart( haplin.data.Iowa, design = "triad",
markers = which.cur.snp, overwrite = TRUE )
haplin.data.cur.SNP
cur.cpg <- as.character( gaps.cpgs.all.df$cpg[1] ) # the individuals are the same, irrespective of CpG
betas.df.subgroup.tmp <- filter(
betas.df.subgroup.gene.list[[ chosen.cleft ]], as.character( CpG ) == cur.cpg )
which.indiv.cur.cpg <- betas.df.subgroup.tmp$indiv
unique.indiv <- unique( which.indiv.cur.cpg )
fam.id <- as.character( sapply( unique.indiv, function(x){
if( sum( ids.final$ID.BSI == x ) != 0 ){
ids.final$ID.N2[ ids.final$ID.BSI == x ]
}
} ) )
chosen.indiv <- haplin.data.cur.SNP$cov.data[ match( fam.id,
as.character( haplin.data.cur.SNP$cov.data[ ,"id" ] ) ), "id" ]
which.null <- which( chosen.indiv == "0" )
if( length( which.null ) != 0 ){
chosen.indiv <- chosen.indiv[ -which.null ]
}
file.subset.haplin <- "subset1"
haplin.subset.data <- genDataGetPart( data.in = haplin.data.cur.SNP, design = "triad",
id = chosen.indiv, file.out = file.subset.haplin, overwrite = TRUE )
gaps.cpgs.genotypes.list <- c( gaps.cpgs.genotypes.list, list( haplin.subset.data ) )
cat( " ALLELE TABLE:\n" )
allele.tab.cur.snp <- table( haplin.subset.data$gen.data[[1]][,] )
allele.tab.cur.snp
allele.tab.cur.snp[ 1 ]/sum( allele.tab.cur.snp )
allele.tab.cur.snp[ 2 ]/sum( allele.tab.cur.snp )
cat( "\n" )
# now, I need to construct a data.frame with the indiv.id, allele and methyl.level:
bvals.plotting.df$allele1[
with( bvals.plotting.df, indiv %in% which.indiv.cur.cpg )
] <- as.character( haplin.subset.data$gen.data[[1]][,1] )
bvals.plotting.df$allele2[
with( bvals.plotting.df, indiv %in% which.indiv.cur.cpg )
] <- as.character( haplin.subset.data$gen.data[[1]][,2] )
}
##
##
## CLEFT TYPE: CLO
## The output file(s) exist!
## Provided arguments:
## --- chosen markers: 2
## INFO: Will select 1217 rows and 6 columns.
## opening ff C:/Users/jro049/AppData/Local/Temp/Rtmpq6iF6H/ff4c423697037.ff
## Saving data...
## ... saved to files: ./my_data_part_gen.ffData, ./my_data_part_gen.RData
## The output file(s) exist!
## Provided arguments:
## --- argument: id, chosen values: 11, 23, 27, 47, 56, 70 ... 666, 668, 669, 674, 676, 678
## INFO: Will select 103 rows and 6 columns.
## opening ff C:/Users/jro049/AppData/Local/Temp/RtmpcB0PXJ/ff1fc46d57b48.ff
## Saving data...
## ... saved to files: ./subset1_gen.ffData, ./subset1_gen.RData
## ALLELE TABLE:
## opening ff C:/Users/jro049/AppData/Local/Temp/RtmpcB0PXJ/ff1fc42e940b2.ff
##
##
##
## CLEFT TYPE: CL/P
## The output file(s) exist!
## Provided arguments:
## --- chosen markers: 2
## INFO: Will select 3447 rows and 6 columns.
## opening ff C:/Users/jro049/AppData/Local/Temp/Rtmpq6iF6H/ff4c4180052a6.ff
## Saving data...
## ... saved to files: ./my_data_part_gen.ffData, ./my_data_part_gen.RData
## The output file(s) exist!
## Provided arguments:
## --- argument: id, chosen values: 1, 5, 8, 11, 13, 15 ... 675, 676, 678, 682, 683, 684
## INFO: Will select 269 rows and 6 columns.
## opening ff C:/Users/jro049/AppData/Local/Temp/RtmpcB0PXJ/ff1fc41adf11ce.ff
## Saving data...
## ... saved to files: ./subset1_gen.ffData, ./subset1_gen.RData
## ALLELE TABLE:
## opening ff C:/Users/jro049/AppData/Local/Temp/RtmpcB0PXJ/ff1fc41b343ea0.ff
##
##
##
## CLEFT TYPE: CPO
## The output file(s) exist!
## Provided arguments:
## --- chosen markers: 2
## INFO: Will select 1829 rows and 6 columns.
## opening ff C:/Users/jro049/AppData/Local/Temp/Rtmpq6iF6H/ff4c44a7ad35.ff
## Saving data...
## ... saved to files: ./my_data_part_gen.ffData, ./my_data_part_gen.RData
## The output file(s) exist!
## Provided arguments:
## --- argument: id, chosen values: 2, 3, 16, 24, 28, 30 ... 616, 626, 628, 630, 637, 687
## INFO: Will select 140 rows and 6 columns.
## opening ff C:/Users/jro049/AppData/Local/Temp/RtmpcB0PXJ/ff1fc45e21c47.ff
## Saving data...
## ... saved to files: ./subset1_gen.ffData, ./subset1_gen.RData
## ALLELE TABLE:
## opening ff C:/Users/jro049/AppData/Local/Temp/RtmpcB0PXJ/ff1fc49cd503b.ff
##
##
##
## CLEFT TYPE: CLP
## The output file(s) exist!
## Provided arguments:
## --- chosen markers: 2
## INFO: Will select 2230 rows and 6 columns.
## opening ff C:/Users/jro049/AppData/Local/Temp/Rtmpq6iF6H/ff4c44ce71911.ff
## Saving data...
## ... saved to files: ./my_data_part_gen.ffData, ./my_data_part_gen.RData
## The output file(s) exist!
## Provided arguments:
## --- argument: id, chosen values: 1, 5, 8, 13, 15, 19 ... 671, 672, 675, 682, 683, 684
## INFO: Will select 166 rows and 6 columns.
## opening ff C:/Users/jro049/AppData/Local/Temp/RtmpcB0PXJ/ff1fc47ec250d3.ff
## Saving data...
## ... saved to files: ./subset1_gen.ffData, ./subset1_gen.RData
## ALLELE TABLE:
## opening ff C:/Users/jro049/AppData/Local/Temp/RtmpcB0PXJ/ff1fc4a9df0a.ff
##
##
##
## CLEFT TYPE: cntrl
## The output file(s) exist!
## Provided arguments:
## --- chosen markers: 2
## INFO: Will select 11461 rows and 6 columns.
## opening ff C:/Users/jro049/AppData/Local/Temp/Rtmpq6iF6H/ff4c43ff22ad9.ff
## Saving data...
## ... saved to files: ./my_data_part_gen.ffData, ./my_data_part_gen.RData
## The output file(s) exist!
## Provided arguments:
## --- argument: id, chosen values: 50002, 50004, 50005, 50006, 50008, 50010 ... 51022, 51023, 51026, 51029, 51030, 51031
## INFO: Will select 456 rows and 6 columns.
## opening ff C:/Users/jro049/AppData/Local/Temp/RtmpcB0PXJ/ff1fc4330a5d82.ff
## Saving data...
## ... saved to files: ./subset1_gen.ffData, ./subset1_gen.RData
## ALLELE TABLE:
## opening ff C:/Users/jro049/AppData/Local/Temp/RtmpcB0PXJ/ff1fc42cf068c9.ff
names( gaps.cpgs.genotypes.list ) <- cleft.types
table( bvals.plotting.df$allele1 )
##
## A G
## 1600 435
table( bvals.plotting.df$allele2 )
##
## A G
## 607 1428
bvals.plotting.df$alleles <- paste0( bvals.plotting.df$allele1, bvals.plotting.df$allele2 )
ggplot( bvals.plotting.df, aes( x= 1, y = beta, color = as.factor( alleles ) ) ) +
geom_jitter() +
facet_grid( cleft_type~CpG )
