-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy path5.coverage.py
79 lines (71 loc) · 2.11 KB
/
5.coverage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import re
import pysam
from sys import argv # argv[1]: prefix, argv[2]: PATH of BAM file
from os import path
linechunk = []
prev_pos = -1
prev_chrom = ""
depth_dic = {}
def process_chunk(linechunk):
for chrom, pos, cigar in linechunk:
cur_pos = pos
pos_back = 0
leftflag = True
for s, n in cigar:
if not leftflag or (s != 4 and s != 5):
for pos_diff in range(n):
if (s != 1 and s != 4 and s != 5):
if (cur_pos-pos_back) in depth_dic:
depth_dic[cur_pos-pos_back] += 1
else:
depth_dic[cur_pos-pos_back] = 1
if s == 1:
pos_back += 1
cur_pos += 1
leftflag = False
prefix = ""
if len(argv) > 2:
prefix = argv[1]
fn = argv[2]
else:
fn = argv[1]
f = pysam.Samfile(fn, "rb")
depth_dic = {}
for cnt, ar in enumerate(f):
if ar.tid == -1:
continue
chrom = f.getrname(ar.tid)
if prev_chrom != chrom:
if prev_chrom != "":
fo.close()
print ("Processing " + chrom + "...")
prev_chrom = chrom
if prefix == "":
fo = open(chrom+"_depth.txt", "w")
else:
fo = open(prefix+"_"+chrom+"_depth.txt", "w")
pos = ar.pos+1
cigar = ar.cigar
if pos != prev_pos:
if prev_pos != -1:
process_chunk(linechunk)
for i in range(prev_pos, pos):
try:
fo.write('%s:%d\t%d\n'%(chrom, i, depth_dic[i]))
del depth_dic[i]
except KeyError:
fo.write('%s:%d\t%d\n'%(chrom, i, 0))
prev_pos = pos
linechunk = [ (chrom, pos, cigar) ]
else:
linechunk.append( (chrom, pos, cigar) )
if cnt % 100000 == 0:
print (pos)
process_chunk(linechunk)
for i in range(pos, pos+150):
try:
fo.write('%s:%d\t%d\n'%(chrom, i, depth_dic[i]))
except KeyError:
fo.write('%s:%d\t%d\n'%(chrom, i, 0))
f.close()
fo.close()