''' Endonuclese contains the REXENZ class''' def rebase2regex(pat): ''' rebase2regex(pat) returns the regular expression defined by pat, where pat is the rextriction enzyme pattern ''' import re pattern=pat.replace('^','') #R = G or A pattern=pattern.replace('R','[GA]') #Y = C or T pattern=pattern.replace('Y','[CT]') #M = A or C pattern=pattern.replace('M','[AC]') #K = G or T pattern=pattern.replace('K','[GC]') #S = G or C pattern=pattern.replace('S','[GC]') #W = A or T pattern=pattern.replace('W','[AT]') #B = not A (C or G or T) pattern=pattern.replace('B','[CGT]') #D = not C (A or G or T) pattern=pattern.replace('D','[AGT]') #H = not G (A or C or T) =[a,c,t] pattern=pattern.replace('H','[ACT]') #V = not T (A or C or G) =[a,c,g] pattern=pattern.replace('V','[ACG]') #N = A or C or G or T = [a,c,g,t] pattern=pattern.replace('N','[ACGT]') return re.compile(pattern,re.IGNORECASE) def cut_dna(setlist,seq): ''' cut_dna(setlist,seq) ''' retseqs=[] if setlist[0] == 0 : # nothing before setlist=setlist[1:] if setlist: oldpos=0 for newpos in setlist: retseqs.append(seq[oldpos:newpos]) oldpos=newpos retseqs.append(seq[newpos:len(seq)]) return retseqs ############################## class REXENZ: def __init__(self, name='',pat=''): ''' __init__(self, name='',pat='') ''' self.name=name self.pat=pat try : self.clivepos=self.pat.index('^') except: self.clivepos=0 # unknown if pat : self.__rex=rebase2regex(pat) def setname(self,name): ''' setname(self,name) ''' self.name=name def setpat(self,pat): ''' setpat(self,pat) ''' self.pat=pat try : self.clivepos=self.pat.index('^') except: self.clivepos=0 # unknown if pat : self.__rex=rebase2regex(pat) def digeststart(self,seq): ''' digeststart(self,seq) ''' setlist=[] shift=0 localseq=seq m=self.__rex.search(localseq) if m : shift=shift+m.end() setlist.append(m.start()) localseq=localseq[m.end():] while m : m=self.__rex.search(localseq) if m : setlist.append(shift+m.start()) localseq=localseq[m.end():] shift=shift+m.end() return setlist def digest(self,seq): ''' digestst(self,seq) ''' setlist=self.digeststart(seq) newlist=[] for i in xrange(len(setlist)): if setlist[i]+self.clivepos < len(seq): newlist.append(setlist[i]+self.clivepos) return newlist def doubledigest(self,enz,seq): ''' doubledigestst(self,enz,seq) ''' setlist1=self.digest(seq) setlist2=enz.digest(seq) retlist=[] n1=len(setlist1) n2=len(setlist2) e1=0 e2=0 while e1 < n1 and e2 < n2: if setlist2[e2] > setlist1[e1]: retlist.append(setlist1[e1]) e1=e1+1 elif setlist2[e2] < setlist1[e1]: retlist.append(setlist2[e2]) e2=e2+1 else: retlist.append(setlist1[e1]) e1=e1+1 e2=e2+1 while e1 < n1: retlist.append(setlist1[e1]) e1=e1+1 while e2 < n2: retlist.append(setlist2[e2]) e2=e2+1 return retlist def digested_dna(self,seq): ''' digested_dna(self,seq) ''' retseqs=[] setlist=self.digest(seq) return cut_dna(setlist,seq) def doubledigested_dna(self,enz,seq): ''' doubledigested_dna(self,enz,seq) ''' retseqs=[] setlist=self.doubledigest(enz,seq) return cut_dna(setlist,seq)