import numpy def BWT(s): # create a table, with rows of all possible rotations of s rotation = [s[i:] + s[:i] for i in xrange(len(s))] # sort rows alphabetically rotation.sort() # return (last column of the table) return "".join([r[-1] for r in rotation]) def inverseBWT(s): # initialize table from s table = [c for c in s] # repeat length(s) - 1 times for j in xrange(len(s)-1): # sort rows of the table alphabetically table.sort() # insert s as the first column table = [s[i]+table[i] for i in xrange(len(s))] # return (row that ends with the 'EOF' character) return table[[r[-1] for r in table].index('$')] def FMindex(s): index = dict() for i, c in enumerate(s): if (c not in index): index[c] = numpy.zeros(len(s)+1, dtype="uint32") for key in index.iterkeys(): index[key][i+1] = index[key][i] + int(key == c) offset = 0 for key in sorted(index.iterkeys()): index[key] += offset offset = index[key][-1] return index def find(pattern, FM): L, U = 0, len(FM['$']) - 1 for a in reversed(pattern): L = FM[a][L] U = FM[a][U] return L, U def suffix(i, FMindex, bwt): result = '' j = i while True: c = bwt[j] j = FMindex[c][j] result = c + result if (i == j): break return result if __name__ == "__main__": text = "mississippi$" bwt = BWT(text) print bwt fm = FMindex(bwt) for key, value in sorted(fm.iteritems()): print key, value for i in xrange(len(text)): print "%2d %s" % (i, suffix(i, fm, bwt)) print find("iss", fm)