-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathids2bed.R
57 lines (51 loc) · 3.63 KB
/
ids2bed.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
library(biomaRt)
library(tidyverse)
df <- read.table("CLIP/motif_genes.txt", stringsAsFactors = FALSE)
ids <- df[-1,1] # Get rid of column name
mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
bed <- getBM(filters= "ensembl_gene_id",
attributes=c("chromosome_name", "start_position", "end_position", "ensembl_gene_id", "strand"),
values=ids, mart=mart)
bed <- add_column(bed, score = 0, .before = "strand")
bed$strand <- str_replace(str_replace(bed$strand, "-1", "-"), "1", "+")
write.table(bed, "s2p/fimo.bed", col.names=FALSE, row.names=FALSE, quote=FALSE, sep = "\t")
# Separate targets into ERG activated vs. repressed
activated_targets <- read.table("RWPE1/activated_target38.txt", stringsAsFactors = FALSE)
activated_ids <- intersect(activated_targets$V1, bed$ensembl_gene_id)
activated_bed <- getBM(filters= "ensembl_gene_id",
attributes=c("chromosome_name", "start_position", "end_position", "ensembl_gene_id", "strand"),
values=activated_ids, mart=mart)
activated_bed <- add_column(activated_bed, score = 0, .before = "strand")
activated_bed$strand <- str_replace(str_replace(activated_bed$strand, "-1", "-"), "1", "+")
write.table(activated_bed, "s2p/activated_fimo.bed", col.names=FALSE, row.names=FALSE, quote=FALSE, sep = "\t")
repressed_targets <- read.table("RWPE1/repressed_target38.txt", stringsAsFactors = FALSE)
repressed_ids <- intersect(repressed_targets$V1, bed$ensembl_gene_id)
repressed_bed <- getBM(filters= "ensembl_gene_id",
attributes=c("chromosome_name", "start_position", "end_position", "ensembl_gene_id", "strand"),
values=repressed_ids, mart=mart)
repressed_bed <- add_column(repressed_bed, score = 0, .before = "strand")
repressed_bed$strand <- str_replace(str_replace(repressed_bed$strand, "-1", "-"), "1", "+")
write.table(repressed_bed, "s2p/repressed_fimo.bed", col.names=FALSE, row.names=FALSE, quote=FALSE, sep = "\t")
# Check non motifs
targets <- read.table("RWPE1/target38.txt", stringsAsFactors = FALSE)
ids <- setdiff(targets$V1, bed$ensembl_gene_id)
bed <- getBM(filters= "ensembl_gene_id",
attributes=c("chromosome_name", "start_position", "end_position", "ensembl_gene_id", "strand"),
values=ids, mart=mart)
bed <- add_column(bed, score = 0, .before = "strand")
bed$strand <- str_replace(str_replace(bed$strand, "-1", "-"), "1", "+")
write.table(bed, "s2p/nonfimo.bed", col.names=FALSE, row.names=FALSE, quote=FALSE, sep = "\t")
activated_ids <- setdiff(activated_targets$V1, activated_bed$ensembl_gene_id)
activated_bed <- getBM(filters= "ensembl_gene_id",
attributes=c("chromosome_name", "start_position", "end_position", "ensembl_gene_id", "strand"),
values=activated_ids, mart=mart)
activated_bed <- add_column(activated_bed, score = 0, .before = "strand")
activated_bed$strand <- str_replace(str_replace(activated_bed$strand, "-1", "-"), "1", "+")
write.table(activated_bed, "s2p/activated_nonfimo.bed", col.names=FALSE, row.names=FALSE, quote=FALSE, sep = "\t")
repressed_ids <- setdiff(repressed_targets$V1, repressed_bed$ensembl_gene_id)
repressed_bed <- getBM(filters= "ensembl_gene_id",
attributes=c("chromosome_name", "start_position", "end_position", "ensembl_gene_id", "strand"),
values=repressed_ids, mart=mart)
repressed_bed <- add_column(repressed_bed, score = 0, .before = "strand")
repressed_bed$strand <- str_replace(str_replace(repressed_bed$strand, "-1", "-"), "1", "+")
write.table(repressed_bed, "s2p/repressed_nonfimo.bed", col.names=FALSE, row.names=FALSE, quote=FALSE, sep = "\t")