Dot Plot in Python
We want to write a python program that create a
- dotoplot. The program should take from command
line:
- 1. a fasta file with two sequences,
- 2. a scoring matrix
- 3. a word size
- 4. a display threshold
Dot Plot in Python We want to write a python program that create a - - PowerPoint PPT Presentation
Dot Plot in Python We want to write a python program that create a dotoplot. The program should take from command line: 1. a fasta file with two sequences, 2. a scoring matrix 3. a word size 4. a display threshold Text example Id matrix,
def computeDP(s1,s2,sm,ws,th): ''' s1,s2= sequences, sm= score matrix (as dict), ws=word size, th= cutoff threshold ''' dp=set() for i in range(len(s1)-ws+1): for j in range(len(s2)-ws+1): sc=0.0 for k in range(ws): sc=sc+sm.get((s1[i+k],s2[j+k]), 0.0) if scoreij >= th: dp.add((i,j)) return dp
def printDP(s1,s2,dp,shift=0): ''' s1,s2=sequences, dp=dotplot, shif=shifting print positions''' print "\\", for j in range(len(s2)): print s2[j], print for i in range(len(s1)): print s1[i], for j in range(len(s2)): pc=" " if (i-shift,j-shift) in dp : pc="*" print pc, print
def computeDP(s1,s2,sm,ws,th): ''' s1,s2= sequences, sm= score matrix (as dict), ws=word size, th= cutoff threshold ''' dp=set() for i in range(len(s1)-ws+1): for j in range(len(s2)-ws+1): sc=0.0 for k in range(ws): sc=sc+sm.get((s1[i+k],s2[j+k]), 0.0) if scoreij >= th: dp.add((i,j)) return dp
>>> s1="TTTAAATTT" >>> s2="SAAASSAAT" >>> al,sm=readScoreMatrix('id.mat') >>> getSimpos(s1,al,sm,ws=1,th=0) {'A': [3, 4, 5], 'T': [0, 1, 2, 6, 7, 8]} >>> getSimpos(s1,al,sm,ws=3,th=3) {'AAA': [3], 'TTT': [0, 6], 'AAT': [4], 'ATT': [5], 'TAA': [2], 'TTA': [1]} >>>
>>> s1="TTTAAATTT" >>> s2="SAAASSAAT" >>> ws,th = 3,2 >>> simpos=getSimpos(s1,al,sm,ws,th) >>> simpos {'AAA': [3], 'TTT': [0, 6], 'AAT': [4], 'ATT': [5], 'TAA': [2], 'TTA': [1]} >>> for j in range(len(s2)-ws+1): ... word=s2[j:j+ws] ... for i in simpos.get(word,[]): ... print i,j,s1[i:i+ws],s2[j:j+ws] ... 3 1 AAA AAA 4 6 AAT AAT >>>
A R N D C Q E G H I L K M F P S T W Y V A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4
>>> import itertools >>> alphabet='AB' >>> ws=3 >>> for e in itertools.product(alphabet,repeat=ws): ... print "".join(e), e ... AAA ('A', 'A', 'A') AAB ('A', 'A', 'B') ABA ('A', 'B', 'A') ABB ('A', 'B', 'B') BAA ('B', 'A', 'A') BAB ('B', 'A', 'B') BBA ('B', 'B', 'A') BBB ('B', 'B', 'B')
def findSumWords(word,alphabet,sm,th): ''' findSumWords(word,alphabet,sm,th) ''' Similar=[] # list of similar words for w in itertools.product(alphabet,repeat=len(word)): cscore=0.0 for i in range(len(word)): cscore+=sm[word[i],w[i]] if cscore >= th: similar.append("".join(w)) return similar
def getSimpos(query,alphabet,sm,ws,th): simpos={} # stores positions of a given matching word simwords={} # for each used word, store similar words for i in range(len(query)-ws+1): word=query[i:i+ws] # create word # test if we searched for similar words if not simwords.has_key(word): simwords[word]=findSumWords(word,alphabet,sm,th) for k in simwords[word]: simpos[k]=simpos.get(k,[]) simpos[k].append(i) return simpos
def lcomputeDP(s1,s2,alphabet,sm,ws,th): ''' s1,s2=sequences, sm=score matrix ws=word size, th=threshold ''' dp=set() # initialize dotplot # process first sequence simpos=getSimpos(s1,alphabet,sm,ws,th) for j in range(len(s2)-ws+1): # loop 1 word=s2[j:j+ws] for i in simpos.get(word,[]): # loop 2 dp.add((i+hw,j+hw)) return dp
def getSimpos(query,alphabet,sm,ws,th): simpos={} # stores positions of a given matching word simwords={} # for each used word, store similar words for i in range(len(query)-ws+1): word=query[i:i+ws] # create word # test if we searched for similar words if not simwords.has_key(word): simwords[word]=findSumWords(word,alphabet,sm,th) for k in simwords[word]: simpos[k]=simpos.get(k,[]) simpos[k].append(i) return simpos
for j in range(len(s2)-ws+1): # loop 1 word=s2[j:j+ws] for i in simpos.get(word,[]): # loop 2 dp.add((i+hw,j+hw))
def lcomputeDP(s1,s2,alphabet,sm,ws,th): ''' s1,s2=sequences, sm=score matrix ws=word size, th=threshold ''' dp=set() # initialize dotplot # process first sequence simpos=getSimpos(s1,alphabet,sm,ws,th) for j in range(len(s2)-ws+1): # loop 1 word=s2[j:j+ws] for i in simpos.get(word,[]): # loop 2 dp.add((i+hw,j+hw)) return dp