From last time.
import os
import pysam
%matplotlib inline
import matplotlib.pyplot as plot
import numpy
Read the BAM file and its index
bamfile = pysam.Samfile("FF0683F.bam", "rb")
if not os.path.exists("FF0683F.bam.bai"):
pysam.index("FF0683F.bam")
Next let's consider a gene with multiple exons, Zfp324 (Chr7, 13551187 - 13559585, +). There are relatively few reads covering this gene (<1000). I have also made an effort to highlight the subset of reads that include splices.
channel = [0 for i in xrange(50)]
plot.figure(figsize=(15, 5))
N = 0
S = 0
for read in bamfile.fetch('chr7', 13551187, 13559585):
if (read.aend - read.pos > len(read.seq)):
# look for an availble channel to plot read
for i, end in enumerate(channel):
if (read.pos > end+100):
channel[i] = read.aend-1
y = i
break
else:
# add a new channel if none is available
channel.append(read.aend-1)
y = len(channel)
plot.plot([read.pos, read.aend-1], [y,y], 'dg')
plot.plot([read.pos, read.aend-1], [y,y], ':g')
S += 1
N += 1
print "%d reads, %d with splices" % (N, S)
x = []
y = []
for column in bamfile.pileup('chr7', 13551187, 13559585):
x.append(column.pos)
n = 0
for read in column.pileups:
if (not read.is_del):
n += 1
y.append(n)
plot.plot(x, y, 'b')
plot.plot([x[0], x[-1]], [numpy.mean(y), numpy.mean(y)], ':r')
Next we consider a highly expressed gene, Peg3 (Chr7, 6656603 - 6683132, +)
channel = [0 for i in xrange(50)]
plot.figure(figsize=(15, 10))
N = 0
S = 0
for read in bamfile.fetch('chr7', 6656603, 6683132):
if (read.aend - read.pos > len(read.seq)):
# look for an availble channel to plot read
for i, end in enumerate(channel):
if (read.pos > end+100):
channel[i] = read.aend-1
y = i
break
else:
# add a new channel if none is available
channel.append(read.aend-1)
y = len(channel)
plot.plot([read.pos, read.aend-1], [y,y], 'dg')
plot.plot([read.pos, read.aend-1], [y,y], ':g')
S += 1
N += 1
print "%d reads, %d with splices" % (N, S)
x = []
y = []
for column in bamfile.pileup('chr7', 6656603, 6683132):
x.append(column.pos)
n = 0
for read in column.pileups:
if (not read.is_del):
n += 1
y.append(n)
plot.plot(x, y, 'b')
plot.plot([x[0], x[-1]], [numpy.mean(y), numpy.mean(y)], ':r')