Longest Common Subsequence

Inputs
Two sequences v and w.
Outputs
The length of the longest common subsequence shared by both v and w, along with a table that enables the finding of one instance of this subsequence.
In [16]:
from numpy import *

def LCS(v, w):
    s = zeros((len(v)+1,len(w)+1), dtype="int32")
    b = zeros((len(v)+1,len(w)+1), dtype="int32")
    for i in xrange(1,len(v)+1):
        for j in xrange(1,len(w)+1):
            # find the best score
            if (v[i-1] == w[j-1]):
                s[i,j] = max(s[i-1,j], s[i,j-1], s[i-1,j-1] + 1)
            else:
                s[i,j] = max(s[i-1,j], s[i,j-1])
            # save the path
            if (s[i,j] == s[i,j-1]):
               b[i,j] = 1               # from up
            elif (s[i,j] == s[i-1,j]):
               b[i,j] = 2               # from above
            else:
               b[i,j] = 3               # along diagonal
    return (s[i,j], b)
In [17]:
def PrintLCS(b,v,w,i,j,result=[]):
    if ((i == 0) or (j == 0)):
        print
        print "w =", " ".join([t[2] for t in result])
        print "   ", " ".join([t[1] for t in result])
        print "v =", " ".join([t[0] for t in result])
        print
        print "LCS =",
        return
    if (b[i,j] == 3):
        result = [(v[i-1], "|", w[j-1])] + result
        PrintLCS(b,v,w,i-1,j-1,result)
        print v[i-1],
    else:
        if (b[i,j] == 2):
            result = [(v[i-1], " ", "_")] + result
            PrintLCS(b,v,w,i-1,j,result)
        else:
            result = [("_", " ", w[j-1])] + result
            PrintLCS(b,v,w,i,j-1,result)
In [18]:
# Example from the lecture
v = "ATGTTAT"
w = "ATCGTAC"
s, b = LCS(v, w)
print "score =", s
PrintLCS(b,v,w,len(v),len(w))
score = 5

w = A T C G T _ A _ C
    | |   | |   |    
v = A T _ G T T A T _

LCS = A T G T A

In [20]:
# A bigger Example
v = "agcgaaagcaggtcaaatatattcaatatggagagaataaaagaactgagagatctaatg"
w = "agcaaaagcagggtagataatcactcaatgagtgacatcgaagccatggcgtctcaaggc"
print len(v), len(w)
s, b = LCS(v, w)
print "score =", s
PrintLCS(b,v,w,len(v),len(w))
60 60
score = 44

w = a g c _ a a a a g c a g g g t _ a g a _ t a a t c a c t _ c a a t _ _ g _ a g _ t g a c a t _ _ _ _ c g a a g c c a t g _ g _ c g _ t c t c a a _ g g c
    | | |   | | |   | | | | |   |   |   |   | |   |   |   |   | | | |     |   | |     | |   | |           | | |   |     | |   |     |   | | |   | |   |    
v = a g c g a a a _ g c a g g _ t c a _ a a t a _ t _ a _ t t c a a t a t g g a g a _ g a _ a t a a a a _ g a a _ c _ _ t g a g a _ g a t c t _ a a t g _ _

LCS = a g c a a a g c a g g t a a t a t a t c a a t g a g g a a t g a a c t g g g t c t a a g

In []: