# check the newest info about the SNPs:
snps.RT <- read.table( "SNPs_from_RT.dat", stringsAsFactors = FALSE, header = TRUE )
snps.RT
## SNP locus RR X95CI p.val
## 1 rs742071 PAX7 1.03 0.9-1.19 6.80e-01
## 2 rs560426 ABCA4-ARHGAP24 0.94 0.82-1.08 3.90e-01
## 3 rs642961 IRF6 0.92 0.77-1.09 3.30e-01
## 4 rs7590268 THADA 1.03 0.87-1.21 7.40e-01
## 5 rs12543318 8q21.3 0.98 0.85-1.13 8.00e-01
## 6 rs987525 8q24 0.87 0.73-1.04 1.20e-01
## 7 rs3758249 FOXE1 0.87 0.75-1.00 5.00e-02
## 8 rs7078160 KIAA1598-VAX1 1.04 0.87-1.24 6.80e-01
## 9 rs8001641 SPRY2 1.08 0.94-1.24 3.00e-01
## 10 rs1873147 TPM1 0.90 0.76-1.05 1.80e-01
## 11 rs227731 NOG1 0.74 0.64-0.85 3.83e-05
## 12 rs13041247 MAFB 0.94 0.81-1.08 3.80e-01
library( biomaRt )
library( dplyr )
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:biomaRt':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
listEnsembl()
## biomart version
## 1 ensembl Ensembl Genes 96
## 2 ENSEMBL_MART_MOUSE Mouse strains 96
## 3 snp Ensembl Variation 96
## 4 regulation Ensembl Regulation 96
all.datasets <- listDatasets( mart = useEnsembl("snp") )
filter( all.datasets, grepl( "hsapiens", dataset ) )
## dataset
## 1 hsapiens_snp
## 2 hsapiens_snp_som
## 3 hsapiens_structvar
## 4 hsapiens_structvar_som
## description
## 1 Human Short Variants (SNPs and indels excluding flagged variants) (GRCh38.p12)
## 2 Human Somatic Short Variants (SNPs and indels excluding flagged variants) (GRCh38.p12)
## 3 Human Structural Variants (GRCh38.p12)
## 4 Human Somatic Structural Variants (GRCh38.p12)
## version
## 1 GRCh38.p12
## 2 GRCh38.p12
## 3 GRCh38.p12
## 4 GRCh38.p12
ensembl <- useEnsembl( "snp", dataset = "hsapiens_snp", GRCh = 37 )
my.attrib <- c( "refsnp_id", "chr_name", "chrom_start", "chrom_end", "allele",
"ensembl_gene_stable_id", "ensembl_transcript_stable_id",
"distance_to_transcript", "reg_feature_stable_id" )
my.filters <- "snp_filter"
snps.RT.info.found <- lapply( snps.RT$SNP, function(x){
out <- getBM( attributes = my.attrib, filters = my.filters, values = x, mart = ensembl )
if( nrow( out ) == 0 ){
out1 <- as.data.frame( t( c( x, rep( NA, length( my.attrib ) ) ) ) )
names( out1 ) <- c( "input.snp", my.attrib )
} else {
out1 <- cbind( data.frame( input.snp = x ), out )
}
return( out1 )
} )
sapply( snps.RT.info.found, dim )
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] 3 2 1 7 1 1 1 1 1 1 1 1
## [2,] 10 10 10 10 10 10 10 10 10 10 10 10
snps.RT.info.found.df <- do.call( rbind, snps.RT.info.found )
head( snps.RT.info.found.df )
## input.snp refsnp_id chr_name chrom_start chrom_end allele
## 1 rs742071 rs742071 1 18979874 18979874 G/T
## 2 rs742071 rs742071 1 18979874 18979874 G/T
## 3 rs742071 rs742071 1 18979874 18979874 G/T
## 4 rs560426 rs560426 1 94553438 94553438 C/T
## 5 rs560426 rs560426 1 94553438 94553438 C/T
## 6 rs642961 rs642961 1 209989270 209989270 A/G
## ensembl_gene_stable_id ensembl_transcript_stable_id
## 1 ENSG00000009709 ENST00000375375
## 2 ENSG00000009709 ENST00000420770
## 3 ENSG00000009709 ENST00000400661
## 4 ENSG00000198691 ENST00000370225
## 5 ENSG00000198691 ENST00000535735
## 6 <NA> <NA>
## distance_to_transcript reg_feature_stable_id
## 1 22374 <NA>
## 2 21859 <NA>
## 3 21856 <NA>
## 4 33250 <NA>
## 5 33243 <NA>
## 6 NA ENSR00000958665
save( snps.RT.info.found.df, file = "SNPs_from_RT_biomart_info.RData" )