import gzip
def loadChromosome(filename):
fp = gzip.open(filename, 'rb')
data = fp.read().split('\n')
header = data.pop(0)
data.pop()
fp.close()
return ''.join(data)
seq = loadChromosome("HumChrMT.fa.gz")
seq = seq + '$'
print len(seq)
print seq[0:30] + " ... " + seq[-31:]
16570 GATCACAGGTCTATCACCCTATTAACCACT ... CACGTTCCCCTTAAATAAGACATCACGATG$
def linesOf64(s):
i = 0
while i + 64 < len(s):
print s[i:i+64]
i += 64
if (i < len(s)):
print s[i:]
print "Seq = "
linesOf64(seq)
Seq = GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTC TGGGGGGTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATC TGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACAT ACTTACTAAAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAACAATTGAATGTCTGC ACAGCCACTTTCCACACAGACATCATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTC TGGCCACAGCACTTAAACACATCTCTGCCAAACCCCAAAAACAAAGAACCCTAACACCAGCCTA ACCAGATTTCAAATTTTATCTTTTGGCGGTATGCACTTTTAACAGTCACCCCCCAACTAACACA TTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATACAACCCCCGCCCATCCTACCCA GCACACACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCCAAAGACACCCCCCACA GTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCC ATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCAT CCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCA GCAATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACCTTT AGCAATAAACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCA CCGCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCC TCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTACG AAAGTGGCTTTAACATATCTGAACACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCAC TATGCTTAGCCCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGAGCCA CAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGAGGAGCCTGTTCTGTAATC GATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATATACCGCCATCTTCAGCAAACCC TGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTCAAGGTGTAGCCCAT GAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTATGAAACTTA AGGGTCGAAGGTGGATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGAAG CGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTA CGCATTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAA CCAGAGTGTAGCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCG CTCTGAGCTAAACCTAGCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCAT TTACCCAAATAAAGTATAGGCGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGG GAAAGATGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTTCTGCATAA TGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAAGACCCCCGAAACCAGACGAGCT ACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTA GAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAGTTCAACTT TAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTCCAAAGAGGAACA GCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAGTAGGCC TAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAAC ATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAATGTTAG TATAAGTAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGAC AATTAACAGCCCAATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACA CAGGCATGCTCATAAGGAAAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTG TTTACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATG TTTAACGGCCGCGGTACCCTAACCGTGCAAAGGTAGCATAATCACTTGTTCCTTAAATAGGGAC CTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCTTACTTTTAACCAGTGAAATTGACCTGCC CGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTATGGAGCTTTAATTTATTAATGC AAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGG GCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAGTCAAAGCGAA CTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGC AATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCC CGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTT CAGACCGGAGTAATCCAGGTCGGTTTCTATCTACNTTCAAATTCCTCCCTGTACGAAAGGACAA GAGAAATAAGGCCTACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATT ATACCCACACCCACCCAAGAACAGGGTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACT TAAAACTTTACAGTCAGAGGTTCAATTCCTCTTCTTAACAACATACCCATGGCCAACCTCCTAC TCCTCATTGTACCCATTCTAATCGCAATGGCATTCCTAATGCTTACCGAACGAAAAATTCTAGG CTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCCCCTACGGGCTACTACAACCCTTCGCT GACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCACATCTACCATCACCCTCTACA TCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACCCCCCTCCCCATACCCAA CCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTAC TCAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGCAG TAGCCCAAACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAG TGGCTCCTTTAACCTCTCCACCCTTATCACAACACAAGAACACCTCTGATTACTCCTGCCATCA TGACCCTTGGCCATAATATGATTTATCTCCACACTAGCAGAGACCAACCGAACCCCCTTCGACC TTGCCGAAGGGGAGTCCGAACTAGTCTCAGGCTTCAACATCGAATACGCCGCAGGCCCCTTCGC CCTATTCTTCATAGCCGAATACACAAACATTATTATAATAAACACCCTCACCACTACAATCTTC CTAGGAACAACATATGACGCACTCTCCCCTGAACTCTACACAACATATTTTGTCACCAAGACCC TACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGCTACGACCAACT CATACACCTCCTATGAAAAAACTTCCTACCACTCACCCTAGCATTACTTATATGATATGTCTCC ATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATATGTCTGATAAAAGAGTTAC TTTGATAGAGTAAATAATAGGAGCTTAAACCCCCTTATTTCTAGGACTATGAGAATCGAACCCA TCCCTGAGAATCCAAAATTCTCCGTGCCACCTATCACACCCCATCCTAAAGTAAGGTCAGCTAA ATAAGCTATCGGGCCCATACCCCGAAAATGTTGGTTATACCCTTCCCGTACTAATTAATCCCCT GGCCCAACCCGTCATCTACTCTACCATCTTTGCAGGCACACTCATCACAGCGCTAAGCTCGCAC TGATTTTTTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAA AAATAAACCCTCGTTCCACAGAAGCTGCCATCAAGTATTTCCTCACGCAAGCAACCGCATCCAT AATCCTTCTAATAGCTATCCTCTTCAACAATATACTCTCCGGACAATGAACCATAACCAATACT ACCAATCAATACTCATCATTAATAATCATAATAGCTATAGCAATAAAACTAGGAATAGCCCCCT TTCACTTCTGAGTCCCAGAGGTTACCCAAGGCACCCCTCTGACATCCGGCCTGCTTCTTCTCAC ATGACAAAAACTAGCCCCCATCTCAATCATATACCAAATCTCTCCCTCACTAAACGTAAGCCTT CTCCTCACTCTCTCAATCTTATCCATCATAGCAGGCAGTTGAGGTGGATTAAACCAAACCCAGC TACGCAAAATCTTAGCATACTCCTCAATTACCCACATAGGATGAATAATAGCAGTTCTACCGTA CAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATCCTAACTACTACCGCATTCCTA CTACTCAACTTAAACTCCAGCACCACGACCCTACTACTATCTCGCACCTGAAACAAGCTAACAT GACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTT TTTGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCATCATCCCCACCATC ATAGCCACCATCACCCTCCTTAACCTCTACTTCTACCTACGCCTAATCTACTCCACCTCAATCA CACTACTCCCCATATCTAACAACGTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCC ATTCCTCCCCACACTCATCGCCCTTACCACGCTACTCCTACCTATCTCCCCTTTTATACTAATA ATCTTATAGAAATTTAGGTTAAATACAGACCAAGAGCCTTCAAAGCCCTCAGTAAGTTGCAATA CTTAATTTCTGTAACAGCTAAGGACTGCAAAACCCCACTCTGCATCAACTGAACGCAAATCAGC CACTTTAATTAAGCTAAGCCCTTACTAGACCAATGGGACTTAAACCCACAAACACTTAGTTAAC AGCTAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCCGCCGGGAAAAAAGGCGGGAG AAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAAAATCACCTCGGAG CTGGTAAAAAGAGGCCTAACCCCTGTCTTTAGATTTACAGTCCAATGCTTCACTCAGCCATTTT ACCTCACCCCCACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCACAAAGACATTGGAA CACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCTCTAAGCCTCCTTATTCG AGCCGAGCTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCACATCTACAACGTTATCGTCACA GCCCATGCATTTGTAATAATCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAACT GACTAGTTCCCCTAATAATCGGTGCCCCCGATATGGCGTTTCCCCGCATAAACAACATAAGCTT CTGACTCTTACCTCCCTCTCTCCTACTCCTGCTCGCATCTGCTATAGTGGAGGCCGGAGCAGGA ACAGGTTGAACAGTCTACCCTCCCTTAGCAGGGAACTACTCCCACCCTGGAGCCTCCGTAGACC TAACCATCTTCTCCTTACACCTAGCAGGTGTCTCCTCTATCTTAGGGGCCATCAATTTCATCAC AACAATTATCAATATAAAACCCCCTGCCATAACCCAATACCAAACGCCCCTCTTCGTCTGATCC GTCCTAATCACAGCAGTCCTACTTCTCCTATCTCTCCCAGTCCTAGCTGCTGGCATCACTATAC TACTAACAGACCGCAACCTCAACACCACCTTCTTCGACCCCGCCGGAGGAGGAGACCCCATTCT ATACCAACACCTATTCTGATTTTTCGGTCACCCTGAAGTTTATATTCTTATCCTACCAGGCTTC GGAATAATCTCCCATATTGTAACTTACTACTCCGGAAAAAAAGAACCATTTGGATACATAGGTA TGGTCTGAGCTATGATATCAATTGGCTTCCTAGGGTTTATCGTGTGAGCACACCATATATTTAC AGTAGGAATAGACGTAGACACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCC ACCGGCGTCAAAGTATTTAGCTGACTCGCCACACTCCACGGAAGCAATATGAAATGATCTGCTG CAGTGCTCTGAGCCCTAGGATTCATCTTTCTTTTCACCGTAGGTGGCCTGACTGGCATTGTATT AGCAAACTCATCACTAGACATCGTACTACACGACACGTACTACGTTGTAGCCCACTTCCACTAT GTCCTATCAATAGGAGCTGTATTTGCCATCATAGGAGGCTTCATTCACTGATTTCCCCTATTCT CAGGCTACACCCTAGACCAAACCTACGCCAAAATCCATTTCACTATCATATTCATCGGCGTAAA TCTAACTTTCTTCCCACAACACTTTCTCGGCCTATCCGGAATGCCCCGACGTTACTCGGACTAC CCCGATGCATACACCACATGAAACATCCTATCATCTGTAGGCTCATTCATTTCTCTAACAGCAG TAATATTAATAATTTTCATGATTTGAGAAGCCTTCGCTTCGAAGCGAAAAGTCCTAATAGTAGA AGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCCCCCACCCTACCACACATTCGAAGAA CCCGTATACATAAAATCTAGACAAAAAAGGAAGGAATCGAACCCCCCAAAGCTGGTTTCAAGCC AACCCCATGGCCTCCATGACTTTTTCAAAAAGGTATTAGAAAAACCATTTCATAACTTTGTCAA AGTTAAATTATAGGCTAAATCCTATATATCTTAATGGCACATGCAGCGCAAGTAGGTCTACAAG ACGCTACTTCCCCTATCATAGAAGAGCTTATCACCTTTCATGATCACGCCCTCATAATCATTTT CCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACT AACATCTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCC TCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGACGAGGTCAACGATCCCTCCCTTAC CATCAAATCAATTGGCCACCAATGGTACTGAACCTACGAGTACACCGACTACGGCGGACTAATC TTCAACTCCTACATACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTG ACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACATCACAAGACGTCTT GCACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTCTAAACCAA ACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAA ACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGT ATTTACCCTATAGCACCCCCTCTACCCCCTCTAGAGCCCACTGTAAAGCTAACTTAGCATTAAC CTTTTAAGTTAAAGATTAAGAGAACCAACACCTCTTTACAGTGAAATGCCCCAACTAAATACTA CCGTATGGCCCACCATAATTACCCCCATACTCCTTACACTATTCCTCATCACCCAACTAAAAAT ATTAAACACAAACTACCACCTACCTCCCTCACCAAAGCCCATAAAAATAAAAAATTATAACAAA CCCTGAGAACCAAAATGAACGAAAATCTGTTCGCTTCATTCATTGCCCCCACAATCCTAGGCCT ACCCGCCGCAGTACTGATCATTCTATTTCCCCCTCTATTGATCCCCACCTCCAAATATCTCATC AACAACCGACTAATCACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGATAACCA TACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTTAATCATTTTTATTGCCAC AACTAACCTCCTCGGACTCCTGCCTCACTCATTTACACCAACCACCCAACTATCTATAAACCTA GCCATGGCCATCCCCTTATGAGCGGGCACAGTGATTATAGGCTTTCGCTCTAAGATTAAAAATG CCCTAGCCCACTTCTTACCACAAGGCACACCTACACCCCTTATCCCCATACTAGTTATTATCGA AACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGTACGCCTAACCGCTAACATTACTGCA GGCCACCTACTCATGCACCTAATTGGAAGCGCCACCCTAGCAATATCAACCATTAACCTTCCCT CTACACTTATCATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTTAAT CCAAGCCTACGTTTTCACACTTCTAGTAAGCCTCTACCTGCACGACAACACATAATGACCCACC AATCACATGCCTATCATATAGTAAAACCCAGCCCATGACCCCTAACAGGGGCCCTCTCAGCCCT CCTAATGACCTCCGGCCTAGCCATGTGATTTCACTTCCACTCCATAACGCTCCTCATACTAGGC CTACTAACCAACACACTAACCATATACCAATGATGGCGCGATGTAACACGAGAAAGCACATACC AAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATACGGGATAATCCTATTTATTACCTC AGAAGTTTTTTTCTTCGCAGGATTTTTCTGAGCCTTTTACCACTCCAGCCTAGCCCCTACCCCC CAATTAGGAGGGCACTGGCCCCCAACAGGCATCACCCCGCTAAATCCCCTAGAAGTCCCACTCC TAAACACATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCACCATAGTCTAATAGA AAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTTTACTGGGTCTCTATTTTACC CTCCTACAAGCCTCAGAGTACTTCGAGTCTCCCTTCACCATTTCCGACGGCATCTACGGCTCAA CATTTTTTGTAGCCACAGGCTTCCACGGACTTCACGTCATTATTGGCTCAACTTTCCTCACTAT CTGCTTCATCCGCCAACTAATATTTCACTTTACATCCAAACATCACTTTGGCTTCGAAGCCGCC GCCTGATACTGGCATTTTGTAGATGTGGTTTGACTATTTCTGTATGTCTCCATCTATTGATGAG GGTCTTACTCTTTTAGTATAAATAGTACCGTTAACTTCCAATTAACTAGTTTTGACAACATTCA AAAAAGAGTAATAAACTTCGCCTTAATTTTAATAATCAACACCCTCCTAGCCTTACTACTAATA ATTATTACATTTTGACTACCACAACTCAACGGCTACATAGAAAAATCCACCCCTTACGAGTGCG GCTTCGACCCTATATCCCCCGCCCGCGTCCCTTTCTCCATAAAATTCTTCTTAGTAGCTATTAC CTTCTTATTATTTGATCTAGAAATTGCCCTCCTTTTACCCCTACCATGAGCCCTACAAACAACT AACCTGCCACTAATAGTTATGTCATCCCTCTTATTAATCATCATCCTAGCCCTAAGTCTGGCCT ATGAGTGACTACAAAAAGGATTAGACTGAACCGAATTGGTATATAGTTTAAACAAAACGAATGA TTTCGACTCATTAAATTATGATAATCATATTTACCAAATGCCCCTCATTTACATAAATATTATA CTAGCATTTACCATCTCACTTCTAGGAATACTAGTATATCGCTCACACCTCATATCCTCCCTAC TATGCCTAGAAGGAATAATACTATCGCTGTTCATTATAGCTACTCTCATAACCCTCAACACCCA CTCCCTCTTAGCCAATATTGTGCCTATTGCCATACTAGTCTTTGCCGCCTGCGAAGCAGCGGTG GGCCTAGCCCTACTAGTCTCAATCTCCAACACATATGGCCTAGACTACGTACATAACCTAAACC TACTCCAATGCTAAAACTAATCGTCCCAACAATTATATTACTACCACTGACATGACTTTCCAAA AAACACATAATTTGAATCAACACAACCACCCACAGCCTAATTATTAGCATCATCCCTCTACTAT TTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCCTAAC AACCCCCCTCCTAATACTAACTACCTGACTCCTACCCCTCACAATCATGGCAAGCCAACGCCAC TTATCCAGTGAACCACTATCACGAAAAAAACTCTACCTCTCTATACTAATCTCCCTACAAATCT CCTTAATTATAACATTCACAGCCACAGAACTAATCATATTTTATATCTTCTTCGAAACCACACT TATCCCCACCTTGGCTATCATCACCCGATGAGGCAACCAGCCAGAACGCCTGAACGCAGGCACA TACTTCCTATTCTACACCCTAGTAGGCTCCCTTCCCCTACTCATCGCACTAATTTACACTCACA ACACCCTAGGCTCACTAAACATTCTACTACTCACTCTCACTGCCCAAGAACTATCAAACTCCTG AGCCAACAACTTAATATGACTAGCTTACACAATAGCTTTTATAGTAAAGATACCTCTTTACGGA CTCCACTTATGACTCCCTAAAGCCCATGTCGAAGCCCCCATCGCTGGGTCAATAGTACTTGCCG CAGTACTCTTAAAACTAGGCGGCTATGGTATAATACGCCTCACACTCATTCTCAACCCCCTGAC AAAACACATAGCCTACCCCTTCCTTGTACTATCCCTATGAGGCATAATTATAACAAGCTCCATC TGCCTACGACAAACAGACCTAAAATCGCTCATTGCATACTCTTCAATCAGCCACATAGCCCTCG TAGTAACAGCCATTCTCATCCAAACCCCCTGAAGCTTCACCGGCGCAGTCATTCTCATAATCGC CCACGGGCTTACATCCTCATTACTATTCTGCCTAGCAAACTCAAACTACGAACGCACTCACAGT CGCATCATAATCCTCTCTCAAGGACTTCAAACTCTACTCCCACTAATAGCTTTTTGATGACTTC TAGCAAGCCTCGCTAACCTCGCCTTACCCCCCACTATTAACCTACTGGGAGAACTCTCTGTGCT AGTAACCACGTTCTCCTGATCAAATATCACTCTCCTACTTACAGGACTCAACATACTAGTCACA GCCCTATACTCCCTCTACATATTTACCACAACACAATGGGGCTCACTCACCCACCACATTAACA ACATAAAACCCTCATTCACACGAGAAAACACCCTCATGTTCATACACCTATCCCCCATTCTCCT CCTATCCCTCAACCCCGACATCATTACCGGGTTTTCCTCTTGTAAATATAGTTTAACCAAAACA TCAGATTGTGAATCTGACAACAGAGGCTTACGACCCCTTATTTACCGAGAAAGCTCACAAGAAC TGCTAACTCATGCCCCCATGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATC CATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCATGCACACTA CTATAACCACCCTAACCCTGACTTCCCTAATTCCCCCCATCCTTACCACCCTCGTTAACCCTAA CAAAAAAAACTCATACCCCCATTATGTAAAATCCATTGTCGCATCCACCTTTATTATCAGTCTC TTCCCCACAACAATATTCATGTGCCTAGACCAAGAAGTTATTATCTCGAACTGACACTGAGCCA CAACCCAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAATATTCATCCC TGTAGCATTGTTCGTTACATGGTCCATCATAGAATTCTCACTGTGATATATAAACTCAGACCCA AACATTAATCAGTTCTTCAAATATCTACTCATCTTCCTAATTACCATACTAATCTTAGTTACCG CTAACAACCTATTCCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCAT CAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATCCTATACAACCGT ATCGGCGATATCGGTTTCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACC CACAACAAATAGCCCTTCTAAACGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGC AGCAGCAGGCAAATCAGCCCAATTAGGTCTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCC ACCCCAGTCTCAGCCCTACTCCACTCAAGCACTATAGTTGTAGCAGGAATCTTCTTACTCATCC GCTTCCACCCCCTAGCAGAAAATAGCCCACTAATCCAAACTCTAACACTATGCTTAGGCGCTAT CACCACTCTGTTCGCAGCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTC TCCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAACCACACCTAGCAT TCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTCCATCAT CCACAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTC ACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGGTTTCT ACTCCAAAGACCACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGCCCTATCTAT TACTCTCATCGCTACCTCCCTGACAAGCGCCTATAGCACTCGAATAATTCTTCTCACCCTAACA GGTCAACCTCGCTTCCCCACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCA TTAAACGCCTGGCAGCCGGAAGCCTATTCGCAGGATTTCTCATTACTAACAACATTTCCCCCGC ATCCCCCTTCCAAACAACAATCCCCCTCTACCTAAAACTCACAGCCCTCGCTGTCACTTTCCTA GGACTTCTAACAGCCCTAGACCTCAACTACCTAACCAACAAACTTAAAATAAAATCCCCACTAT GCACATTTTATTTCTCCAACATACTCGGATTCTACCCTAGCATCACACACCGCACAATCCCCTA TCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACCTAACCTGACTAGAAAAG CTATTACCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCAACCCAAAAAG GCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCAC ATAACCTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGT AACTACTACTAATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATC AACCCTGACCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCACAACCA CCACCCCATCATACTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAAC ACTCACCAAGACCTCAACCCCTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTA GTATATCCAAAGACAACCATCATTCCCCCTAAATAAATTAAAAAAACTATTAAACCCATATAAC CTCCCCCAAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCAATACTAAACCCCC ATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACTCAACAGA AACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGACCAATGATATGAAAAACCATC GTTGTATTTCAACTACAAGAACACCAATGACCCCAATACGCAAAACTAACCCCCTAATAAAATT AATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGC TCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATGCACTACT CACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTG AATCATCCGCTACCTTCACGCCAATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATC GGGCGAGGCCTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCC TGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTG AGGGGCCACAGTAATTACAAACTTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAA TGAATCTGAGGAGGCTACTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCA TCTTGCCCTTCATTATTGCAGCCCTAGCAACACTCCACCTCCTATTCTTGCACGAAACGGGATC AAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATCACCTTCCACCCTTACTACACAATC AAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACATTAACACTATTCTCACCAG ACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACATCAA GCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTAGGAGGC GTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAAC AACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCT CATTCTAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCA TCCGTACTATACTTCACAACAATCCTAATCCTAATACCAACTATCTCCCTAATTGAAAACAAAA TACTCAAATGGGCCTGTCCTTGTAGTATAAACTAATACACCAGTCTTGTAAACCGGAGATGAAA ACCTTTTTCCAAGGACAAATCAGAGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAG ATTCTAATTTAAACTATTCTCTGTTCTTTCATGGGGAAGCAGATTTGGGTACCACCCAAGTATT GACTCACCCATCAACAACCGCTATGTATTTCGTACATTACTGCCAGCCACCATGAATATTGTAC GGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCAATCCACATCAAAACCCCCTCCC CATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCAACTGCAACTCCAAAGC CACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAGTACATAAAGC CATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAG ATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCT CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACT TCAGGGTCATAAAGCCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATG$
import numpy
import sys
sys.setrecursionlimit(100000)
class BWT:
def __init__(self, seq, verbose=0):
cmap = numpy.zeros(128, dtype='uint8')
for i, c in enumerate("$ACGNT"):
cmap[ord(c)] = i
self.mix3 = cmap[seq]
tmp0 = 36*self.mix3[-2] + 6*self.mix3[-1] + self.mix3[0]
tmp1 = 36*self.mix3[-1] + 6*self.mix3[0] + self.mix3[1]
self.mix3[0:-2] = 36*self.mix3[0:-2] + 6*self.mix3[1:-1] + self.mix3[2:]
self.mix3[-2] = tmp0
self.mix3[-1] = tmp1
self.maxlevel = 0
self.verbose = verbose
self.suffixSort()
self.BWT = ''
self.FMindex = None
def trio(self, val):
s = ''
for i in xrange(3):
s = "$ACGNT"[val%6] + s
val = val // 6
return s
def resolveTies(self, ties, offset):
if (offset > self.maxlevel):
self.maxlevel = offset
next3 = self.mix3[(ties+offset)%len(self.mix3)]
counts = numpy.bincount(next3)
result = numpy.empty_like(ties)
start = 0
for i, n in enumerate(counts):
if (n == 0):
continue
end = start + n
result[start:end] = ties[next3 == i]
if (n > 1):
result[start:end] = self.resolveTies(result[start:end], offset+3)
start = end
return result
def suffixSort(self):
counts = numpy.bincount(self.mix3)
self.suffixArray = numpy.empty(len(self.mix3), dtype='int64')
start = 0
for i, n in enumerate(counts):
if (n == 0):
continue
end = start + n
self.suffixArray[start:end] = numpy.flatnonzero(self.mix3 == i)
if (self.verbose > 0):
print self.trio(i), n
if (n > 1):
self.suffixArray[start:end] = self.resolveTies(self.suffixArray[start:end], 3)
start = end
def getBWT(self):
if (self.BWT == ''):
for i in self.suffixArray:
predecessor = (i - 1) if (i != 0) else (len(self.mix3) - 1)
self.BWT += "$ACGNT"[self.mix3[predecessor]//36]
return self.BWT
def getFMindex(self):
if (self.FMindex == None):
s = self.getBWT()
self.FMindex = dict()
for i, c in enumerate(s):
if (c not in self.FMindex):
self.FMindex[c] = numpy.zeros(len(s)+1, dtype="uint32")
for key in self.FMindex.iterkeys():
self.FMindex[key][i+1] = self.FMindex[key][i] + int(key == c)
offset = 0
for key in sorted(self.FMindex.iterkeys()):
self.FMindex[key] += offset
offset = self.FMindex[key][-1]
return self.FMindex
def find(self, pattern):
FM = self.getFMindex()
L, U = 0, len(FM['$']) - 1
for a in reversed(pattern):
L = FM[a][L]
U = FM[a][U]
return L, U
def suffix(self, j):
FM = self.getFMindex()
result = ''
i = j
while True:
for c in FM.iterkeys():
if (FM[c][i] != FM[c][i+1]):
break
i = FM[c][i]
result = c + result
if (i == j):
break
return result
nSeq = numpy.fromstring(seq, dtype='uint8')
print len(nSeq)
bwt = BWT(nSeq, verbose=1)
print "max recursion level = ", bwt.maxlevel
16570 $GA 1 AAA 524 AAC 495 AAG 209 AAT 376 ACA 448 ACC 515 ACG 119 ACN 1 ACT 412 AGA 178 AGC 282 AGG 174 AGT 161 ATA 367 ATC 371 ATG 162 ATT 330 CAA 465 CAC 454 CAG 199 CAT 416 CCA 464 CCC 624 CCG 141 CCT 542 CGA 122 CGC 155 CGG 80 CGT 78 CNT 1 CTA 523 CTC 419 CTG 180 CTT 318 G$G 1 GAA 201 GAC 169 GAG 129 GAT 114 GCA 207 GCC 271 GCG 54 GCT 179 GGA 122 GGC 151 GGG 72 GGT 80 GTA 154 GTC 106 GTG 55 GTT 104 NTT 1 TAA 414 TAC 377 TAG 258 TAT 324 TCA 415 TCC 361 TCG 121 TCT 307 TG$ 1 TGA 190 TGC 123 TGG 99 TGT 100 TTA 329 TTC 308 TTG 116 TTT 251 max recursion level = 15
suffixArray = bwt.suffixArray
def space(trips):
return ' '.join([trips[i:i+3 if i+3 < len(trips) else len(trips)] for i in xrange(0, len(trips), 3)])
prev = ''
for j in xrange(0,len(nSeq),100):
i = suffixArray[j]
if (i + 18) < len(seq):
print "%9d %9d %-24s %s" % (j, i, space(seq[i:i+18]), seq[i:i+18] >= prev)
prev = seq[i:i+18]
else:
print "%9d %9d %-24s %s" % (j, i, space(seq[i:]+seq[:18-len(seq)+i]), seq[i:]+seq[:18-len(seq)+i] >= prev)
prev = seq[i:]
0 16569 $GA TCA CAG GTC TAT CAC True 100 2131 AAA ACC TTG TAG AGA GAG True 200 14026 AAA CAA TTT CAC AGC ACC True 300 15900 AAA CTA ATA CAC CAG TCT True 400 1293 AAA GTA AGC GCA AGT ACC True 500 612 AAA TGT TTA GAC GGG CTC True 600 335 AAC ACA TCT CTG CCA AAC True 700 8122 AAC CAA ACC ACT TTC ACC True 800 3490 AAC CCG CCA CAT CTA CCA True 900 2563 AAC GGC CGC GGT ACC CTA True 1000 11335 AAC TTA ATA TGA CTA GCT True 1100 13342 AAG CCA TAC TAT TTA TGT True 1200 1739 AAG TAT AGG CGA TAG AAA True 1300 14448 AAT AGC CAT CGC TGT AGT True 1400 13151 AAT CCA AAC TCT AAC ACT True 1500 8168 AAT GCT CTG AAA TCT GTG True 1600 10072 AAT TTT AAT AAT CAA CAC True 1700 12216 ACA AGA ACT GCT AAC TCA True 1800 6769 ACA CCA TAT ATT TAC AGT True 1900 2373 ACA GCC CAA TAT CTA CAA True 2000 4841 ACA TCC GGC CTG CTT CTT True 2100 9369 ACC AAT GAT GGC GCG ATG True 2200 12667 ACC CAA ACA TTA ATC AGT True 2300 6418 ACC CCC TGC CAT AAC CCA True 2400 10397 ACC GAA TTG GTA TAT AGT True 2500 2837 ACC TCC GAG CAG TAC ATG True 2600 10167 ACG AGT GCG GCT TCG ACC True 2700 439 ACT AAC ACA TTA TTT TCC True 2800 9337 ACT AGG CCT ACT AAC CAA True 2900 14792 ACT CAT TCA TCG ACC TCC True 3000 9691 ACT GCT TAT TAC AAT TTT True 3100 7525 AGA AAA ACC ATT TCA TAA True 3200 15735 AGA CCT CCT CAT TCT AAC True 3300 511 AGC ACA CAC ACA CCG CTG True 3400 16317 AGC CAT TTA CCG TAC ATA True 3500 4410 AGC TAA ATA AGC TAT CGG True 3600 1197 AGG AGC CTG TTC TGT AAT True 3700 2643 AGG GTT CAG CTG TCT CTT True 3800 13257 AGT CAA CTA GGA CTC ATA True 3900 4778 ATA AAA CTA GGA ATA GCC True 4000 6095 ATA ATC TTC TTC ATA GTA True 4100 16138 ATA CTT GAC CAC CTG TAG True 4200 6774 ATA TAT TTA CAG TAG GAA True 4300 14937 ATC AAT CGC CCA CAT CAC True 4400 15790 ATC ATT GGA CAA GTA GCA True 4500 4688 ATC CTC TTC AAC AAT ATA True 4600 15019 ATC TGC CTC TTC CTA CAC True 4700 7313 ATG ATT TGA GAA GCC TTC True 4800 4975 ATT AAA CCA AAC CCA GCT True 4900 2405 ATT ATT ACC CTC ACT GTC True 5000 243 ATT GAA TGT CTG CAC AGC True 5100 16042 ATT TGG GTA CCA CCC AAG True 5200 8675 CAA ACT AAC CTC AAA ACA True 5300 2211 CAA CAC CCA CTA CCT AAA True 5400 15110 CAA CTA TAG CAA CAG CCT True 5500 11357 CAA TAG CTT TTA TAG TAA True 5600 3154 CAC AAA GCG CCT TCC CCC True 5700 572 CAC AGT TTA TGT AGC TTA True 5800 14811 CAC CCC ATC CAA CAT CTC True 5900 15346 CAC GAA ACG GGA TCA AAC True 6000 5603 CAC TCT GCA TCA ACT GAA True 6100 13198 CAG CAG TCT GCG CCC TTA True 6200 16300 CAG TAC ATA GTA CAT AAA True 6300 12072 CAT ACA CCT ATC CCC CAT True 6400 6517 CAT CAC TAT ACT ACT AAC True 6500 16497 CAT CTG GTT CCT ACT TCA True 6600 7275 CAT TCA TTT CTC TAA CAG True 6700 15989 CCA AAG CTA AGA TTC TAA True 6800 12484 CCA CAA CAA TAT TCA TGT True 6900 11141 CCA CCT TGG CTA TCA TCA True 7000 16458 CCA TAA CAC TTG GGG GTA True 7100 15979 CCA TTA GCA CCC AAA GCT True 7200 11140 CCC ACC TTG GCT ATC ATC True 7300 14619 CCC CAC AAA CCC CAT TAC True 7400 10935 CCC CCT AAC AAC CCC CCT True 7500 8843 CCC CTT ATG AGC GGG CAC True 7600 13129 CCC TAG CAG AAA ATA GCC True 7700 1277 CCC TGA TGA AGG CTA CAA True 7800 9865 CCG CCA ACT AAT ATT TCA True 7900 4396 CCT AAA GTA AGG TCA GCT True 8000 10564 CCT AGA AGG AAT AAT ACT True 8100 10612 CCT CAA CAC CCA CTC CCT True 8200 590 CCT CCT CAA AGC AAT ACA True 8300 13976 CCT GCC CCT ACT CCT CCT True 8400 16393 CCT TGA CCA CCA TCC TCC True 8500 7214 CGA CGT TAC TCG GAC TAC True 8600 1536 CGC ATT TAT ATA GAG GAG True 8700 10584 CGC TGT TCA TTA TAG CTA True 8800 8996 CGT ACG CCT AAC CGC TAA True 8900 727 CTA AAT CAC CAC GAT CAA True 9000 3345 CTA ATC GCA ATG GCA TTC True 9100 7825 CTA CGC ATC CTT TAC ATA True 9200 3873 CTA GCA GAG ACC AAC CGA True 9300 13180 CTA TCA CCA CTC TGT TCG True 9400 10613 CTC AAC ACC CAC TCC CTC True 9500 9332 CTC ATA CTA GGC CTA CTA True 9600 15142 CTC CCG TGA GGC CAA ATA True 9700 11857 CTC GCC TTA CCC CCC ACT True 9800 1241 CTC TTG CTC AGC CTA TAT True 9900 11739 CTG CCT AGC AAA CTC AAA True 10000 3297 CTT AAC AAC ATA CCC ATG True 10100 13441 CTT CAA CCT CCC TCA CCA True 10200 13959 CTT CTT ACG AGC CAA AAC True 10300 10923 CTT TTC CTC CGA CCC CCT True 10400 6698 GAA CCA TTT GGA TAC ATA True 10500 1857 GAA TTA ACT AGA AAT AAC True 10600 2982 GAC CTC GAT GTT GGA TCA True 10700 2144 GAG AGT AAA AAA TTT AAC True 10800 1603 GAG TGT AGC TTA ACA CAA True 10900 13915 GAT TCT ACC CTA GCA TCA True 11000 2536 GCA CCG CCT GCC CAG TGA True 11100 13758 GCA TCC CCC TTC CAA ACA True 11200 5251 GCC CAA ATG GGC CAT TAT True 11300 9914 GCC GCC GCC TGA TAC TGG True 11400 2875 GCG AAC TAC TAT ACT CAA True 11500 4769 GCT ATA GCA ATA AAA CTA True 11600 11681 GCT TCA CCG GCG CAG TCA True 11700 6578 GGA GAC CCC ATT CTA TAC True 11800 16455 GGC CCA TAA CAC TTG GGG True 11900 8871 GGC TTT CGC TCT AAG ATT True 12000 8164 GGT CAA TGC TCT GAA ATC True 12100 15178 GTA ATT ACA AAC TTA CTA True 12200 6971 GTA TTA GCA AAC TCA TCA True 12300 7774 GTC TGA ACT ATC CTG CCC True 12400 170 GTT CAA TAT TAC AGG CGA True 12500 4779 TAA AAC TAG GAA TAG CCC True 12600 10443 TAA ATT ATG ATA ATC ATA True 12700 15749 TAA CCT GAA TCG GAG GAC True 12800 13273 TAA TAG TTA CAA TCG GCA True 12900 5054 TAC AAC CCT AAC ATA ACC True 13000 10793 TAC CAC TGA CAT GAC TTT True 13100 10166 TAC GAG TGC GGC TTC GAC True 13200 11237 TAC TCA TCG CAC TAA TTT True 13300 619 TAG ACG GGC TCA CAT CAC True 13400 10596 TAG CTA CTC TCA TAA CCC True 13500 8740 TAG TAT CCT TAA TCA TTT True 13600 4208 TAT ATG ATA TGT CTC CAT True 13700 3559 TAT GAA CCC CCC TCC CCA True 13800 16014 TAT TCT CTG TTC TTT CAT True 13900 13291 TCA ACC AAC CAC ACC TAG True 14000 14034 TCA CAG CAC CAA ATC TCC True 14100 13813 TCA CTT TCC TAG GAC TTC True 14200 7796 TCA TCC TAG TCC TCA TCG True 14300 13021 TCC ACC CCT GAC TCC CCT True 14400 6151 TCC CCT AAT AAT CGG TGC True 14500 9729 TCC TAC AAG CCT CAG AGT True 14600 6234 TCC TGC TCG CAT CTG CTA True 14700 7329 TCG CTT CGA AGC GAA AAG True 14800 5725 TCT ACT TCT CCC GCC GCC True 14900 12523 TCT CGA ACT GAC ACT GAG True 15000 11623 TCT TCA ATC AGC CAC ATA True 15100 5033 TGA ATA ATA GCA GTT CTA True 15200 8695 TGA TAA CCA TAC ACA ACA True 15300 12234 TGC CCC CAT GTC TAA CAA True 15400 7884 TGG CCA CCA ATG GTA CTG True 15500 2625 TGT ATG AAT GGC TCC ACG True 15600 12026 TTA ACA ACA TAA AAC CCT True 15700 9508 TTA CCA CTC CAG CCT AGC True 15800 10226 TTA GTA GCT ATT ACC TTC True 15900 5542 TTC AAA GCC CTC AGT AAG True 16000 4590 TTC CAG TTC TAA CCA AAA True 16100 12943 TTC TAA ACG CTA ATC CAA True 16200 15013 TTC TTT ATC TGC CTC TTC True 16300 9936 TTG TAG ATG TGG TTT GAC True 16400 8757 TTT ATT GCC ACA ACT AAC True 16500 6089 TTT GTA ATA ATC TTC TTC True
bwtString = bwt.getBWT()
print "BWT = "
linesOf64(bwtString)
BWT = GCTAGGTCCCCGAATAGACCGAATATTCCAATCGGTACAAAAAATAACCCACAGTTAGTGAATT AGGAATCGACGATACGTTAACGCAAACTGCCTTTTCAGCCCCTGATTATTTAAATCTATATAAT TGCTAAAAAAATGAATAAGCCGACAGTATCATTATGCACTCAGGTAACACTAAAATAAGTACCT CCACGACCATAATTTTAAACCAGCCCACAGGTCCTATTGCGCCGAGACAATTCACACTCCAATC TCACATTAAATCCGTGCCCCTTTTGCATGAATACCTTAGTACATTATCCACCAACAATCCAAAT ACTACCACAACACGCATGATACCCCCCTACCAATATGTCGCACCCCTCTCCTCCGTTCCAGCAA AGCCTTAACAACAGTACTATCATATGGTCGTACAAGACTAGTTGTCTTATGATACTATGCTATC CCCGATTCGCCTAACCCAAAAATTCTAGATACCCGACAAAAACTGGCACCGAATAAATTTTACA AAGGGATGAATCATTACCACTTCTCACACAAACATATGTTATAACCTCTACGGATCGACCCTCC ATCACCTAATCCCGTCTCCCAACAATGCTCCTACAACTCGCTCGGTTTACAATCCTATCATAGT TTCTGTTTGCGTGTGTATGCCTCGCCCGAATCCAAAACATCTTACTTACTACTAACCTTGAACA TTCTGGCTTCACAACCCTCTGTTAAGTCGCCTGTATACATTCTAAGCCCGTTAAAAAAAACAGA AATTATTCAAAGAGCCTCAACACAGCAATCTCAGCTGCTTCAGAGCTAACCCGCACCTCATCAT TAAAGAATAACCTCTCACCTCTCCTTCTTATTGATAAACCTCATTCAGTGAATCCGGGCCAGAA ATACTCACACCCTCCAACCAATATCGAGCAACTAACATTGGTAAACAAGTCGCGCCAATAAACA AACAACTCCTAACGAGAGAAGACTGGCCGACAGTCAACCACCTATCTAAATTCAACTTTCGTTC CCTCAGAGGCCCGAATAATTCGCCACCATCTAACGAAGATCAAACCATTATTGCCAGCAATCCC GAGCAACCCCTAAAAAGGAGCATGGACGCCTCTTGTGATGCAGAACAAATTTTACACTGGATGT TAAAGGCAAAACTCCAACCACAGTAAGGCGTAATGCAAATAAACCCCCACACCCGCGAACTAAA TATATACGTAAGGACAAATTAACTCAATTTACAAAATTGGGATTCTGTTTTAGTCGTCACTTGC GTCTTCAGTACCATACGACTCAAGCACCTTCTACTACAACAACTATTCCTACCAACCAACCTTA ATCATCCGCTTTCAAGTCCGTTCCAACTTCATAGACATCACAATTTCTTTGTCTTGTTTCATAA AATCACTAATCCCCACTTCTGGCTTTCTAAGTCTATATCACCCTCACTTAAGGAAAATGTCTCG TACCACTAATTCTCAACGACGTCAGAACCTCTTCCGCAAACCGTCAAATCTGCCTTTATGTCTT CTCCATTACAGCCAATTATCGATTTCACGCTAGCATATACACATCCCGTATATATCCAACTTGA TCATAACGAGTGTAAAACCTACGCTACAAGCCAATACCAATCTAAGTACACACACCCCGCCCGA AAACAGACCTGGCAATATTAAATAACGATCATCATCCTCGCATATGAACGAGAACCAATACTAC ACACGTATTAGACAGGAACCAATGTTTAATTAACTTATACCCACTCGACAAAAAAACATATGAC TTATCAATCCCAAACCGATCCTATAAAATCCCCCTCTACTAATTCACCCGCACACCCGAATTAC ATACCCTACTAGTATCCAAACCCCTAAGAAACTCCCCACAACCAACACCCAACAAAACACCTAC TCAAAACCACCAATTATTGCATCACCGTATTTTATCAGCCACAAAATCTTCCCTTAAAACTAGC CCTCTAGCAAGCGTTGGCATAACGCCAACGCACCAGCCCTAAATCCGAACAATCACAACAAGTG ACTTAATAAAAACGTTCCAACTTCTAAATAAACGGTGGCCGATAACTCCGCGTGCCATTTTCTG AACAGGCCAGCATACATTCGCCACCTACATAATCTATCACAAACATAACCCATATCAATCACTC TACATCCATATCTAACACAACAAAGGCTCATACCCCTCAAACAGTACCATGCCATTTCCACGTA AACACTATTTAAGAAACTATACCACCCAAACGCCATGTACTCTAGAAGTAGACACACTAAAAAA TTAATGTCGTAATCCGCACAACCGTCCATAACAACACTCCCGAGCCTCTCCTCCTGGCCAACCT CAATTCCCCCACACCAAAACCCCCCTAGTAATAACTTAGTCATTCCCACTCACAGCAGCCAGTT TCATAATGTTTGATTGGATCCAACAATCCAACCAAAGCCTTAACAAAACGAGCATTCCCTCGAT ATTAACGAGCCTGAGAAGCACCTATCCCTCAAATCTCATAAAGGAAGCGCACAGTCAATTCGCA ACTCTAAACAAACTTAGGTATCCTCATCCTCCGCTTGCGTTTCACACCTTTACAGCATTTCAGA ATCGTTTTGTAAACCACGGAGAACCTCTGATATCCTATGCTTACGATCGCGGTGGATCCGTATT ACACTTTCCTATAGTTTTCTAGAAAATACATTTAACCAGTGACATTTGATCGGTACTTATGATT GTTCAAGACAAAGTGTTTTTAACCATGTCTACTGGACTACTAGCAAACTTATTTTATTGATCCT TACGATACCTTTGATAATCTGCTCCCACCGCCGATATGTTAAATCGATCTTCCACCTCGCACTT ACTGAACATTCTCTACTAGTCCCTAACTATCCCTGACACTTCTGTGTCTCTAGTGTTATTATGT TGCCCTGTTAAATAACTTCTCTCACCTGTATTCGTAACAACATCTCCCGATTCCAACTGTCATA TCACCAAAACGAAAATATTAAACACTCCCCGTCCTTGGTCACCTTGATTTAAGTTGCCTTTTCC ATATCCCACACACCGCCACATACAACTGTGTTGACTCCTCGAGTTGATATTTAACAAGGTACAA ACCCAAGATTTGTGTGCTTGTACCGGTTATTACGCTATACTATGTCTACAGGAAACCCTTCATC CACCACACGTGTAATTTTACAACTGAGTACAAATTTATACGCCGTACATATCAATAACTATAGC CATACAAACTACGCTGCTTTTACTTATTTATACAGCCGGATCCCTATAGAATACTGTGATCACA GTTCTTTTCGGCCCTAATACTGGATTTATTTTTCTTTTGTGGATATAGTCGTCCTCACTCCCCA CTTTTTCCACTCCTTGTTACTCATATAATAGTAGTTGTCCCGCGACCTTATTTGTACCGATACC TACCTCTCAAATGAAAAGGTGCATATGAAACCAAAACGCAACGTCAATAACACAGCTATCTAGA TCAAGAACTGCTCTATAGGGTAGGCTCGTTTGAACAGTTTTACTAGCAAACTTTTACTAGATCA AAAGTCCTAATTGGATGTTTGTTGTTCATCTTATCCCGCACTCAGCAGCCGACTAATCGTGTTA TGTTGATCGTCTTTAGGTACGTTTTTTGACGCCTGACTTGTCGAGTCTCAGCGCTTCTTATACG TACCTCTGAGTACATGGACCCTGTACTGTATCTTACACGATCGCGATTCCATTCTCCGTTCGAC TCTGTAGTTTAATTGTCCTAAACCAATCACACTCGCAGGAACCGTTCATCTCTGCCACATCCCA CGGCCCAGGCGTAATAGAGACATTATGTTATTGGTCCCTCCCAGTTTAAACCCTAACCTCACCG AGACAACCCAAGATCATTACCAACCTCACCTTAACCCGGTTCTGACCACCCTCTAACACACCAT ATAACTTAACACCCCCACACGCCAGCCCCAAAACCCACCAAACACCATTACCTCACCAATGCCA TGTACTCTACCGCCCCTTTTCGTAAGAAGAATATTCAATTCTCATCCCACATATGCCCTCCGAC TTCCACAAGTTCACGCAATGATTTCTTCAGCAAAACAGACCAATAAGAAAAAACACTTCTAAGA TCATTCCTAAACCTAAATTTATCCTTCCTATACGTAGACTCCAGACGATTTACCTTGCAAACTT GAACCATATCGGAAAATTATACAGTACAATCCCAGACCCCGGACTAGCAACCTAATACTAAGAC CACCCCAATTATCATCCTCCTTAACGAAAAATCCCGCCTTCCCCAACAATTCGCCCTAACACAA ACGCGTACCACCATCTCCCCATTAATCACCCACCCATTTCACCCCCGTCGAAATCCATCAAGCA ACCATCTTTAAACCTGTATATTACCAAACACTCTTGCTCCCATCTCCTCCCGGCCTAATTCATC CACAACCAACCGCGTCTCAATTTCATCTCAATCCAAATCAAACCCCCTACCCATAAACTGCCTC ATCATACTCTACTCCTTACAGATTATCCCACCATAATAAATTACATTTACGTAAGCTCGTTCCC TAATAATAAAAACCCACAATCCACTGTTGCGTCTGTAATAGTGCATATCAAAAAGCCATCATTG CCTCCTCCGTGCTATGTAATACACACTACACGCTCTCTCAGCCGCACGAAATTCCACCCACATT AATCTCAGACCTTCAGTATCAAAACACATCTGTGTTTTTTCTCTTTACCTGAACGGACAAGCGT GTCATGTCCCATAGTCGCCAAAGACTCTATAAAAAACTGTCATAAATCTTTTCCCTTCGTTCCC TGTGTCCTTACAAAAATTATCAGAAATTCTCTAACACACACTCCCTTCCCCAAACCCATTCTGT CCTTAATCCAACGCCTTTCAACGTACTTTTCCCAGTGCCATTACCCTATCCAACCTTTCTTCTT AATGTAAGAATTATTTCTTCACCACACATACACTCTCCGTCAACAACATGCTTCCTTCGTTAGC TGTGTATTGTGATCGTCAACTGCTTGTATATGTCCATAGTATACGCCATCATACATCTCCACTC GGTGCACTACTAACCAACCTCTACTAGTCATGAATAAGCGCACCCCCTACAAAGCGCCAAGCCA CCCCACCAGTAACCCATATTTAGTGTCTCAAACCACTCCTAAACTCATTACCCTTGGATTCCCA ATCTCCTCAGGTCACGACCTTTATCCCAATTTACCTTAAACCATAACCCACTTTATCCAGATAG CTAAATCAATCATAATCTTTCACAAGTAATTATACAAATCTTCCATTAACAGTAATAAGTTTCC TAAGCTTCTCTAACCCCAACTTATGTCCACCGGTTGTTCTTACTTTTGAACCACCCGAAACACA GATTTTCAGTGTCCAGAATACGTGACCGTGGAGTCTTCAAAGGCCGTGCCTCTGTGTCAAATCA TTAAGTCTGTATCAAGACTCTCTTACAAACAGGTATATCGATATAAATCACCGCTCGCCACAAC CTTCCAAAGGTAAATCTTATTATAACACAACATTCCTTTCTACACACCCCATTGCAAATGCATA AGACTACTCAGTCTCAGAACAGCTTATTCAGTCCGCTCCTCCCAATCCATTGTTTCTGCCACCT TCTGCCTAAGGCACACACTCCAACCAGCTAGTACGCAAGCTTGAATTGCGACCCTACTCTAAGT TTTTATTACTACCCTCCTTGCTCGCCATCTAATAATTCTCCACATTACTGTCAGCCAATATCGC CACTTAATGTCAAATCTCTTTCCACCCAATAGCATATCTGACGCAACAATCACCTTACTTCACA GTTCCCTCCCTTGGACCAAAACCTAATGTTCGCCGCCTCCATACCTTAACCCCGCCACAGATAA CATGCTTCCACACAAACAAGATCCTACAGCATAACACCCCCTGTTGTTCCAACACTGTGGGCTT GAAACACTCTCTTATCGATCTTCCTATGCGATCGACAATTCCAATATAACGCACTTGGACTTCT AGTTCGTAAGCAACAGAGAAGAACATGCCTACCTGATATCATAGATAGGCATGCTAGAAGATAA CAGACATATTGGGTAATTGGGCGGGAAGGACTTACGAATTCAAACAGAAGGCCTGAAGAGGTGA GCCAAAGAAGCAACTGCACCCCGAGACGTCGTAAAACACGACCGCATCCATTGCACACTAACTG ATATTGCCGGCCTTCTTGAGCGAATTCGTTGCACACTGCCCCCTACCCGCAGTAAAATCTATTA TATATATACCCATCTCCCATTCATAATCTAGTAAAGCTCGGAGACTTCTAGTGTAGTCCATAAT GAAACGTAGCCCCCCCCTGTCACTAACCCATCTCGGCTGGCACTTCTTTTAGACTCTTTATCAC CTCGTATTTTCTTCCCATTAACCAAGTCTCCAAACGAATTCTCTTCAACACTAACTCGTCTGGT ACTCACCTTACAAGCCTCCCATAACCCTTAGCTAACGATACATCTATAAGCGATGCCTACTTTA ATCTATTTTTGCGGCGACTCATCTTTCACTCTTGATCACTGCGATTGCCGCCACTTCGGCATAA GTAATCACTCAGGACTTTCCCTCAGCAGAATAAATCTCTATTAGCCTCAGTATAAACAAAGACC AGCTAACTTAAAGCGCACGCGACCCCGCAATTCCCAAAATATTTCACCCTCCAAACGAAGCAAA AATTGAATACCCCCACCACAGTGAACATACGAACTCCGACGTCGCGAGCGCAGCTCCAAGAACT CGGGCACGCAGTAACAACACCTGTTACCGCTACCCTTGCAGGTCTTGAAATACTAATACATTCC CACCCGCACCCACCTCTAACACATCCCCAACCGTTCCGAGCGGACAAACAACTTGGAGCATCCG AACTAAAACCAATCTTTCGTCTACCAGTGTCATAACCTCCAACGCGCGCACATATCCACGGAAA CCTTAGTTGGAGACTGGCCCCTCACTACGCATCATGAAACTACAGTCCCCCGCCCGCACCAGCC GCCCCCGCAATTAGTAAAGCACCAGTACACACAGATACAGACCACCAAAGAAACACGACGAACA CTCAGACGAAACACGCAAGCACCACCTTACCCCACGACGTGACACCCTATATCCGGGCATAAAA CTCGGCCGGTCGCTCCAACTAACCTACGCCCGCGGGCCCACGCCACCCTCCACCAGCGACCTGG CTACATCTCTTTGTCATTTTCAATACACAAAACTATCTAAATCCCCTTACAACCACACTCGCCG CCACAAACGGAGATGTAAAAGATAAACAATGAGTATGTTTAAAATCACACCATATAAAAATAGA CAAGGCCAACCTCCCAAACCGCTTGCCACTAGAGTGTCCATTTCAACTGTCACCCCCCCCAAGT CCAACTCAATAATACCACGTCTCCCTGCACCCGCCACTCTTCCCAGCCATCCGCCCTCGCTAGG AAGTCCCGTCATCACAAAATGCCATTGTACCTCCAAGTCGACACAGGACAAAAGCTGAAACGAA ACCGTAAAGCGCATACATATCTACCGGAACACGCCCCCTACGAAAAAGGGGAAAATTACCTCTC GCTCTTCCGCACTCACAATGACACGCTTCTCCAAGGTCAAGTACGCCCTGCTACCCAGTTACCT ACGCACCAATGGCCACTCTGCTCAAACCTAGTCCTCCCCCAAACACGGGGCTACTATCCTACAC ACCGCGGAGCAGACCAAACATTTAGAATCTGTTCAGGGACTTAGAAGTATACCAAGCTTACAAT CCCTTCATCCATACAGACACAGATACTCTCGCACCCTCCCAGAGTCGCATAACCCCTTTCTTTG TTTGCCACCTGTATTTCTTCATCTGGATTAGACCAACTAGCGCTCCCAGACCTCTATGTCTGTT GCTCGTCGGCTCCTACCGACGCTAGCAGTGTGTTTTCTCCTTATCCTCTTATCCCCGGTCGTAC GTATGCCATTCGCCCCATAAGTTTCACTGATTACTACAACAGAACTTGATCTCGTCACCCGATC TGTCAGCCGCCTATGTGTCTCTTCATCTAAGCCACTCACCACTTTAAAATACCTATAAGTCCCA TTCATAAGACAGACTCGCAATGTCATAACCCCCCCCCCCGCACCAATCACATCATCTCTACCTG CTATAAAAGCCCCACCCCACAGCCCTGAAAGTGCACTAAACTGGATATTGGAGTGTCAGCCAAC GCCAATAGCGCGCCTACTTGTTCGCTTTCCGCCCAGCCGAACCTCCTCCGCATGCCAGCCACCG CCGCTAGGCTAGAGATCTTACTACTATTTAAACAAGCAGCTCGCAAATAGAACCTCTTAAAATG CATTCACAGCTTTGTTTCTTCTCCACTAGAACCAACAGCTAATACTAGGCTCCCCTGAACAGTC CCGAAATCACGAAATTACTTGGAGCACACACCCGTAACACAACAACACGGGGTCCCTTTTAATG CATTTACCCTGTCCTTTCCATGCCAACATCGATACCCTTTACCAACCTATAAGTTACCCCCAAT GAGAAACCGCACTTTTGACAGCCGCCCCCTCTGACCTCCAAGTCCTATTATCTTCATTTTTATC TTCCAACTCCCGATATACCTCTTCTCATTCTATACTCTTCTACTCAGATTTGCCAGAAAGGCTT AAACCCCAGGAGTTGGTTTAAGGCATACGACTCCTACACTGCTTCCCCCGTCGCATTCGCTGAC CTTACCCACCTTTCTAAAGATCAAACAATCGTAACACCAACGCGGCCCATAAACGCTACCACGA CGACTGTGCCCTACGTGACATCATTCCCAGAGTCACACGAGAACCCCGCATAGTCAAACGGTCC GGGATGGAGTAGCGCCACAAACACTTACAAAACCCAAAGCTACAAAACCAACCCTCCTAACTGC AATATCTATACCCGTAGCCTCGTTGAAACACAACGTTCCCTCTACCCTTGCATAATAGTCCCGT ATGAACGACAGACCCACATCAACCACTCACAAAGAACCTACAATATCTAAAATGCTCCTCTCAT AACGCCCTGGTCCCCGCTATCACACTCCCCTACCACCCATTTCCTCCCACCCTCCCACCTCCCC CCACCCCCTCCCAGACTACTACCAACCCACATCATCCCTGTCAAACCAAAACAATACTACGTAA CACAGCCGAGCCCACAACACGTTACCAGCAGGCACACAGAAGTCACCAACATCAACCGAAAACG TGAGCAGACAGGCTTCCCCAACCCCACAACACCTTCCAACCTATACAGCAACAACGCACAGCTC CCCACCATGTATCCCCTATTTCAAAGCCGCCACAGTCACCTAGTCACACCTAACAGCCGCGACT CTTTACATCCCATTGGCTTCATGACTAACCTTTACAGCATAACCACATAAAAACATCACAGCAG CACAAACCTTAATATTATAAGAAATATAATCATTCCCTGCTAACTATCCCCCCCACCCATTCAT CATTTCTCCTACACCCCCTGCCTCTCATCGGCCTACACCAAACTTCCACCCTCCCCCAAATGCA TACTTCTCATAAACCTTTACTAAGCTAACTTGAGAGCCCCCCACCCAATCCTCCCCATGGTACC CACCACCTACCCCCTCCTTTGACCAACGTAGCTATAATCCCACACGCGGCCATTATAATCGTCC AATCTCGCCCCCATGAACCTTAAACTACACCCGCCTCAATACCGCAACCGTCCCCGCATCTTTC TTCTTCTCCTATCCCCATATAAATCCAGTTCTCACGCACCCCCTTCCTCCCATCGATCATGATT GCCAGCGCCGAATCACTCCAAAGTGAGCCTGGATTTCCTCGACCTGAGCGCGGCTATTAAGGTT CCTCGTGAAACAGACCTCGCCAAACCCTCCTACGCGCCAAGTCCCCTGTGGTCACCCCACATTG TGCTCACTGGCATGTTTAATTGTTACGAATACTCCTACTGTCCCGATCATTGATATGAATTACG CAGTACTCTGGACGGCAGCGAATTTGACCACACTTGATGGAATCTGGTTGCACTCACCGATCAA CATAACATTCAAACTCCAGACGTACTTCCTTTGCGCACAACGTTCTCATTACCCATTCCCGCAA GCTAGATCATTATCTCATACCCTAAACAGCGCAATTTATATTTGAAAAACCCCTCGGTCCCTAC GGTCGGTGTAACATCTAATATAACCATCCTATGTTCCCCCAATCTTGAAACGGCCAAAGTATAT TTAATAAGTGGTAAAGCAGTAGTTCAACTCATAACCTCAAAATTCCTTCTAAACGCATTGTATC ATACAACAATGGGCCTACAGACGTATTAGATACCTAATAAGTGCCGCGGGTTCGTAAGTTTAGC CGTTTAGATACCATTCCGCTTGAAGTGATGTATATACGTGATGTAACGTACTTAGAATCAAGCA CACTTAAATCTATTACCTTCCACATATACCAAAAAACTAACCTCGGCCGGAAATTAAAAACAAA GACATTGTCGTGTGACAGATTTTTGGTTGTGCGTTTTGCTTAGTGGAGTGTTTACCCATAGCGT GTTGATGCCAGTAGGGCTAGGCGCCCTCATTCAAATTTCGGCGATTTCTTGTCGGTGAGAGGAG AGTTATTGACGAAGGAGGGATTCCTTAAAAAGCAATCGATGGCTGCAAAAATTTTCCGGTTGGG AATCGTCATCCTAATTTCTCTATCGTC$ATGTTGTTCCTTGTTTTTCAATTGGTCTTACAGATC CATCAAGCCTAATGGTGCTAGCCAAGATTAATTTGTATGTTCACTCAAAACTCGGAAGACGTTT GGACTGAATCACCAAAATCAAAACAACTTCCAAAGTGTGGAGTGATACAAAAAGAAGTCCTTAT CCTACTACCCAGATTAACAGAACAACAATTGGTATAAACCATCATCAAGAAACCACCAAATAGC CGCAAATGGTAAATAAGTAAAGGCCACCCAACGCCCAGTAAAGAGCAAGAACTTGACATAGATA AGACAACATACAGAAGACGAGAAACAGTATCAACATTGACTGTAATGTCTACGACAAAGAAAAA TGTAAATCGCCAAAATAGCGCTAAAAGAGTGGTAATATTATACTAAAGTTCGACTCAGCTAAAA AAATAAATCACCATACAGGCACCAATATTACAACTCACCTGAAAGCACGAGAAGCCACAGTAGA GGTTGTAAGAGACAGTGGATCCCATATATGAGGAAATCGCCTCGCCAGCAGACCCCACAACGAA AGCAACAGATGGGGTTTGGCGCAAGAGGGATAGGAGAAGGGTGAGCGCGAGGCGGTCGAGTAAA AACGCCACCTCCTAAATAAACACGGCGGCAACCCAGGCATCTTAATGCGACAAAGCGCAAAAGG ACGAGGGTCTTGCGAGCTCGATTCAAAATTCACAGCAGTACAACATACAAACACACACAGAAAT AGTAGGGTCTTATAGGATTTGCCGCAGTGCGGACTTTTGAGGGAAAGCAGGGATAAACTAAGAG CGCTAACAAAAAAACAAAATCTGAAATAGTACACCCAAAAACCAGACAGGAAAACATCCCTTAG CAATCAACTAAAACACAGTTGAGTAAGTCAATTTAGACTAAACAATTTAAAGATAAGTTTCAGA ACATCACTTTATTGTAAGTGTGTTGGAAATAAGGGCATAAAAAATCGACTTAATACGAAAAAGA GACTCCCTTTACAAATTCCGCATAAACGGAATCACTGAGCATATTCAGACAATAATTCATATAC CCAATGTATTCCGACCTCACGTGTCCAGATTGCATCCGCTAAAATTTTCGGGGATGGGTACGAT AAAAAAACTATCGCCTAAGTGTGCAATTAAAGCTCCAGTGGATGAAAAGTGTGTTTCCAAAACA AAAATAGTAACGTGGTCACATAGGTAGCAATTACCATATCCCTTATACAGATAACAAAGAAATA AATGACCAAAACCAACGGTATGAAATCTCCCCTAAGATAAACACTGATTTGATTATTCGATTAC TTAATAACCTATTAAACGCGAAGAAATACATGGTCGTCGATAACATATTGACTATGGGAGAGGG GCATAACGTACGGTCACATATAAGGCAAATCCGCAGACAACGAATATACTTGTATTGACTAGAG CAAAGTTATGTACGTCTAACGGCAATTACAAAACTAAATGCAGTCAATCGAGCCGTTAATTAAG TAGGGGAAAGTAGAGCATAGAGAAACCTTACGAACACTTGCAAATTACCAGAAGTTTCCTACGT TTTTGAACTTATAGCACAAATGGTAACCCGAATATAGCTAGAGGACGGAGGGGCTCGAACTTGC AGTAGAGCCTTCCTGACCACATTCCAATCACGATAGACAAAATAATCTTAACCTTCTCCTAAGC ACCAATCCGACTATATGGGTACAAGCTTCATCCTCTCGTCACATAGATCCCCGGATTGAGTCAC ACTTCCCCCACTATAACCTCGCTCACACGTCATGACCTTCTCGCGCCACTTCCAAAGTTCTACC AATAACCCCCCTCACCCCATACACCCTTCCCTTTTGATTTCCCGTTTCTATGCTATCAATCCAC CCTCTCGCCCCCGAGGTGCACCAACATGACAGTGCAGAAGCATTCCGACTCCGACCACAACCAC ACATAGACACACCCCCATAAACTAATCTCGATCAAAGCGAAACCCACCACAATACTCTTCATTC TTGACAATACATCTCACCCTCTTATACACCTACCCAGCACACCTCTCTTCTTAAACTCCGCTAT AACCCGTAACCCTTCATCAGTTTTTTTGAGTTGCCGAGCTCATTGTGACTTCAATAAAACATTC TCGTCTCTCCCGCTTCTTCCTATATCGGCAAAGACTACTACAACTTCCCGCATTTCTACTTCGC ATTCTCGCTCGCCCCATTCCTCTACACTCCATTAGCCCCTTTCCTCACCCCCACAGCATTCCAC GCCCCCCCTCATTCATCAATACCCAAGCTGATTCAAGATCACCCTTAACAAACAACCGCTGTAC CCTACCACTACCCCATCCCCCCCCCCCGACCCAACCTCATTACCTACGAATGCGTTGACTACAT ACACACATGCCCCCTAGTTTAACACCCACACTGAAACACACACGGCCCCGCCTAACCTGCACTG ACGTATGTGCTCACAATCCATAACCCTGATCTAGTCCTGTTCCCAGCTCTCGACCACCCGAAGC GCCACTCCAAAAACCATCACGCGAAAAGTTTCATGCGACACCCGCCGGCCCATAAACCATCAAC CTGGCCGCCCCATCACACGGTAACATAACCAATGTGTAAAGCTCAGAAAGAATACGTTGCCTCT CCACCCAATATACTCCTAAACACGAGTTTTCAAGTGGCAAGACATTTACCCTCGCTTCCAGCTC ACACCCTGTTTATACCAAACACTGCAGCTGATTCCTTTACACACTGATCCCCCTCATACCCCAC TAAATTCCCTTCAACCAGCCTTTCATCCGATTTACGCATTAACACCCATCCTCCATCTGACATC AATACAGTAATCTCCTCAACGGGCCCCAGAATCGTCTCTCCCAGATACCCCCATTTATTCTCAA CGCGGGTCTGATATTCCTTAAACTCTCCATCCTCCTCCTCCCCACCAGTCCTTCTAAAAAAGTG CCTCCCGAACAGAGTTCTGCATGACATTCCACAACACCTAATACTTGTCGGATAGCTTAAAAAT ACCCACCCTACAATTACGCCCAACCGATGGTACTCGCTAAACTCTCCTACTTATCGCATAAAGC AGAATTCCATCCATCCTGCCTCAAAGTTCTAACCACTAACACTGCACTTATGGCAACCACCATC GCAACACACCATCCCTATCCCCACGAAACCGAAGATAGATTTAATAAACAAAACTACTCCAATC CTCCACACTCCCCTTCTATAGACCTCTCAGCCGATAACCACCACGTTCAACCCTACCAAAGTAC TCACCACAAATCCCAAAAATATTAATCCAAACCTTCCAACCCACCAAAGTTACCCAACTATCCC CATTGAAACTTACCTTCTCATTCCAACCTGATGCCTCTCACAACGACTACCCTAAATGAATAAA ACGCCCCCCACTGAATACGTATCTATCTCAATCCCCCACCTTCTCCCCCAATTCCCCCACCGGA CGTACCCCCAAAAGTGCCTCGGCACCCCTCTCCGACTAAAACACACGCTATTTACTTTATATAT TTATATCTCATAACCACACTTCCGCTCCACCAACCCATCAACCACACACCCCCAGGGACTCTCC CAACATACACCACCACAAACCCGATAAACGGCACTTCTTACTAACCAGTCCCTGATGCACTACC GGTTGACTATAAGACCCATTACACCCCCCTTTCCTAGACCAATAGATTCACCCAACCCCATAAT TCCTCACTCCGTCACCCCATCCATACGCTGCACTTAATAACTAGTTGCATTTATTCTATATCAA CCACTTTGACGGCCCAATTAATTGTCCACAAAACTATAACAATCTTCCCCAACCAACAATATAC AATATAGGTAACTAACATTTACTCATGTCCGACCTTTTACCGTTTTCAGACTCCCACTCTGTCT CCAATTTCCATACATCAAACCTTCTCTTATTTATCAGTCGATCATCTATCAGCATTCTCTCAAG CTGCTAAATCTATAGCATATTGGCCACATCAACCGTACTCCTTTTCGTCATCAATTTTTTCGCC AACTACTGAGTCCAGACTCTCGATTAGCGCTGTCCATAGAAAATGGATTAATACACACCTATTA CTTGCACGACTAACAATTCTTTCACACATTTTCACACTTTTCCTTACATATATGACTGCGCTCG TCTCAAGAGCCTAAAAATAAACCACTAATGCAGGACTTTGAACCACCCCCCGGTCTCAACAATC CGAATAAACCTCTCTGCAACCATTTAACAACATATACTAAGTCACGCTTTCACTCCCCAAACTC AACGGCCACCCACCCAACAAGCATACCAGCCCAAACTGGAAAAACACCTTCCCTCGAACTTTAC GCGTAAGCAACCCAGTCGATTTAACCAATCAATGATGAATCCCATCCAACATGCCGCTCTCTCT CAATAAGCACACCATTTATTTCAGCAGCACCCCCTCCGCACCAGAGCTATGCCAGCCAAAAGCA GCCCCCACTTTTTTGCCAGCCTTAAGGTGAAACTCACCACTTACACCACAGCATATCATTAGAC TTGAGTACCCAGAAATACTTCCCAAATTCTATATCTATCCCGTATCTTACTTCCTCTTGTTATC TTGGATTCCTCCCCCTTACTCTCACTCAACAATCAAAGCAACCTGCAATCACAGTGACATACCC TCACTCCCAAACAGAGCAAACCCCTACCACACATCAGTAAACGCGGATACCCTAAGCTTTTAAT AGACTAAAATTTAATTTGCAAGACTTAACAACCACAACATAGCCACTCCACAGAACCATCCTCC ATATTTTTATCGATTTTCCCTACTTTGAACCTGTATCTGATATACCTATCTCCTAACACACACC CCACATCAAGACAATATCGCACTAAATTATAACTCACCCCCCCATTCACCACGATCCTGCGCCC CAAAAAAAGTACCAAATTTATCATTACCACCCCCATCAAATGTTATCTCCCCAAATGTCTCAAT AATGGCATACATATATACACTAAATCCCCCCNTCCAGCGTCTGAATGCAGCCGGACCCATTACC CTGTCTTCCTCATTTTAGCGACCTGCCCCTTACTAAGCATTTTAGCCCAATCATCTGTCCCCCG AACCGCCCAATCAATTAGCCTTGACCCTAATACCCCAGAACTCCTCTTTAAATCATAAGTCCAC AACCCCCCTACCGACTCGACTCGCCACTTTACTTGGCCGCACAAAATAGCAACTAACACTACCA TTAATCACAAAATTCTCCAACCAAGCATCTATCACCTACACACTGTCACCCCAGCCGACTCCCC TCAATACAAGTAATGGTATTATGGCCCCACTGTAATGTAATTAGTCTGTCCATAAATTTACACC CTACCAAATACTGTAACCTCAATAGCTATGTATGGCCCATAACCTGTGCGCAGTTGAAATTCAG ATCATCGTTCGAGACATTCCAAACACACCGAAGTTAATATAATTCGCGTCGTCCAAAATTATAT AGAAGACTGAAACTACTTTAAAGTACCTAACACAATAGCTGCCTCAAAGATATCACACTTTAAC TAATATGAGCCAACCAATAACTCCCGGATTGACATCCACATACCACTCAAGTATTCTGGCCTCC ACCCAATAGCCCAACTATGCCATACCGATAATTGATTCAATATTACCAATCCTAATAG
lo, hi = bwt.find('CATCAT')
print lo, hi
6407 6429
for i in xrange(lo,hi):
print bwt.suffix(i)[:60]
CATCATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACTTAA CATCATAATCCTCTCTCAAGGACTTCAAACTCTACTCCCACTAATAGCTTTTTGATGACT CATCATAATCGGAGGCTTTGGCAACTGACTAGTTCCCCTAATAATCGGTGCCCCCGATAT CATCATACTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAACAC CATCATAGAATTCTCACTGTGATATATAAACTCAGACCCAAACATTAATCAGTTCTTCAA CATCATAGCAGGCAGTTGAGGTGGATTAAACCAAACCCAGCTACGCAAAATCTTAGCATA CATCATAGCCACCATCACCCTCCTTAACCTCTACTTCTACCTACGCCTAATCTACTCCAC CATCATAGGAGGCTTCATTCACTGATTTCCCCTATTCTCAGGCTACACCCTAGACCAAAC CATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCC CATCATCCACAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAAC CATCATCCCCACCATCATAGCCACCATCACCCTCCTTAACCTCTACTTCTACCTACGCCT CATCATCCCTCTACTATTTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCCCAAC CATCATCCTAGCCCTAAGTCTGGCCTATGAGTGACTACAAAAAGGATTAGACTGAACCGA CATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGACGAGGT CATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGCCCTATCTATTACTCTCAT CATCATGACCCTTGGCCATAATATGATTTATCTCCACACTAGCAGAGACCAACCGAACCC CATCATTAATAATCATAATAGCTATAGCAATAAAACTAGGAATAGCCCCCTTTCACTTCT CATCATTACCGGGTTTTCCTCTTGTAAATATAGTTTAACCAAAACATCAGATTGTGAATC CATCATTATTCTCGCACGGACTACAACCACGACCAATGATATGAAAAACCATCGTTGTAT CATCATTCCCCCTAAATAAATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATT CATCATTCTACTATCAACATTACTAATAAGTGGCTCCTTTAACCTCTCCACCCTTATCAC CATCATTGGACAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCCTAATACCAAC