{ "metadata": { "name": "", "signature": "sha256:ffb19fa8ab45f2c45e49e033887f34a27082d3aa634b77f1cdff5b3ee132104d" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Motif Finding" ] }, { "cell_type": "code", "collapsed": false, "input": [ "nucleotideMap = { 'a':0, 'c':1, 'g':2, 't':3 }\n", "reverseMap = { 0:'a', 1:'c', 2:'g', 3:'t' }" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "ScoreCount = 0\n", "\n", "def getAndClearScoreCount():\n", " global ScoreCount\n", " rval = ScoreCount\n", " ScoreCount = 0\n", " return rval" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "def Score(s, DNA, l):\n", " \"\"\" compute the consensus SCORE of a given l-base \n", " alignment given offsets into each DNA string.\n", " s = list of starting indices, 1-based, 0 means ignore\n", " DNA = list of nucleotide strings\n", " l = Target Motif length\"\"\"\n", " global ScoreCount\n", " ScoreCount += 1\n", " score = 0\n", " for i in xrange(l):\n", " # loop over string positions\n", " cnt = [0 for x in xrange(4)]\n", " for j in xrange(len(s)):\n", " # loop over DNA strands\n", " sval = s[j]\n", " if (sval != 0):\n", " cnt[nucleotideMap[DNA[j][sval-1+i]]] += 1\n", " score += max(cnt)\n", " return score\n", "\n", "\n", "def Consensus(s, DNA, l):\n", " \"\"\" compute the consensus STRING of a given l-base \n", " alignment given offsets into each DNA string.\n", " s = list of starting indices, 1-based, 0 means ignore\n", " DNA = list of nucleotide strings\n", " l = Target Motif length \"\"\"\n", " cstring = ''\n", " for i in xrange(l):\n", " # loop over string positions\n", " cnt = [0 for x in xrange(4)]\n", " for j in xrange(len(s)):\n", " # loop over DNA strands\n", " sval = s[j]\n", " if (sval != 0):\n", " cnt[nucleotideMap[DNA[j][sval-1+i]]] += 1\n", " cstring += reverseMap[cnt.index(max(cnt))]\n", " return cstring" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "def NextLeaf(a, L, k):\n", " \"\"\" generates all L^k permutations of \n", " of the list of integers, \"a\", when\n", " initialized with k*[1] \"\"\"\n", " for i in reversed(xrange(L)):\n", " if (a[i] < k):\n", " a[i] += 1\n", " break\n", " else:\n", " a[i] = 1\n", " return a\n", "\n", "\n", "def NextVertex(a, i, L, k):\n", " \"\"\" generates all nodes in a\n", " search tree with L^k leafs \"\"\"\n", " if (i < L):\n", " a[i] = 1\n", " return (a, i+1)\n", " else:\n", " for j in reversed(xrange(L)):\n", " if (a[j] < k):\n", " a[j] += 1\n", " return (a, j+1)\n", " a[j] = 0\n", " return (a, 0)\n", "\n", "\n", "def Bypass(a, i, L, k):\n", " \"\"\" ignore the children of an interior node beyond i generations \"\"\"\n", " for j in reversed(xrange(i)):\n", " if (a[j] < k):\n", " a[j] += 1\n", " return (a, j+1)\n", " a[j] = 0\n", " return (a, 0)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "