i2mD = {0:'A', 1:'C', 2:'G', 3:'T'}
m2iD = dict(A=0,C=1,G=2,T=3)
# This is just another way to initialize a dictionary

def motif2int(motif):
    '''convert a sub-sequence/motif to a non-negative integer'''
    total = 0
    for i, letter in enumerate(motif):
        total += m2iD[letter]*4**(len(motif)-i-1)
    return total

def baseN(n,b):
    '''convert non-negative decimal integer n to equivalent in another base b (2-36)'''
    return ((n == 0) and  '0' ) or \
    ( baseN(n // b, b).lstrip('0') + "0123456789abcdefghijklmnopqrstuvwxyz"[n % b])

def int2motif(n, motifLen):
    '''convert non-negative integer n to a sub-sequence/motif with length motifLen'''
    intBase4 = baseN(n,4)
    return ''.join(map(lambda x: i2mD[int(x)],'0'*(motifLen-len(intBase4))+intBase4))

if __name__ == '__main__':
    import sys
    from Bio import SeqIO

    # read in the fasta file name and motif length 
    # from command line parameters
    fastafile = sys.argv[1]
    motifLen = int(sys.argv[2])

    # list to store subsequence frequency
    frequencyL = [0]*4**motifLen

    # go over each DNA sequence in the fasta file 
    # and count the frequency of subsequences
    it = SeqIO.parse(open(fastafile),'fasta')
    for rec in it:
        chrom = rec.seq.tostring()
        for i in range(len(chrom)-motifLen+1):
            motif = chrom[i:i+motifLen]
            frequencyL[motif2int(motif)] += 1
    
    # print frequency result to screen
    for i, frequency in enumerate(frequencyL):
        print int2motif(i, motifLen), frequency

