Need Your Help for small corection - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: General Coding Help (https://python-forum.io/forum-8.html) +--- Thread: Need Your Help for small corection (/thread-2983.html) |
Need Your Help for small corection - desul - Apr-22-2017 hi, Can you tell why (NN NN NN NN ) IS NOT WORKING BELOW PROGRAM and (JJ) is only alone coming. actually, i required this pattern (jj* NN+) PLEASE COPERATE ME TO CORRECT THIS CODE class extract(object): def __init__(self,regex=''): self.w=[] self.pos=[] self.sw=[] self.spos=[] self.regex=regex self.r=re.compile(regex) self.stl=[] self.tgl=[] def map_con(self,begin,end): self.index=[] f=1 for i in range(len(self.d)): mi=self.d[i][0] ma=self.d[i][1] if begin>=mi and begin<ma and f: self.index.append(i) f=0 elif end>ma and not f: self.index.append(i) elif end<ma and not f: #self.index.append(i) f=1 elif not f: self.index.append(i) break return self.index def n_gram_extract(self,line): #w=nltk.word_tokenize(line.lower()) self.d={} self.tag_dic={} line=line.replace('-',' - ') w1=nltk.word_tokenize(line) tag=nltk.pos_tag(w1) self.spos=[j for i,j in tag] self.sw=[i.lower() for i in w1] self.w=' '.join(self.sw) self.pos=' '.join(self.spos) pos=0 if not self.regex: print 'No regex found for spliting' return self.sw for i,j in enumerate(self.spos): l=pos+len(j) self.d[i]=[pos,l] pos=l+1 my_iter=self.r.finditer(self.pos) self.stl=[] self.tgl=[] count = 0 for match in my_iter: count += 1 ran=match.span() #print ran,count posi= self.map_con(ran[0],ran[1]) st='' tg='' for num in posi: st+=self.sw[num]+' ' tg+=self.spos[num]+' ' st=st.strip() tg=tg.strip() self.stl.append(st) self.tgl.append(tg) self.tag_dic[st]=tg #print st,tg #print count return self.tag_dic#self.stl#,tgl,self.tag_dic if __name__=='__main__': s="""Fast Breeder Test Reactor is a 40 MWt/13.2 MWe sodium cooled, loop type, mixed carbide-fuelled reactor. Its main aim is to gain experience in the design, construction and operation of fast reactors including sodium systems and to serve as an irradiation facility for development of fuel and structural materials for future fast reactors. """ o=extract('(JJ NN)|(JJJJNN)|(JJ NN NN)|(JJ NN NN NN)|NN|(NN NN)|(NN NN NN)|(NN NN NN NN)|(NN JJ)|(NN NN JJ)|(NN JJ NN NN)') ne=o.n_gram_extract(s) print(ne)
RE: Need Your Help for small corection - nilamo - Apr-30-2017 What is even going on here? That regex is just a bunch of groups of J's and N's. So it shouldn't match anything in your text, right? Is that the output you're currently getting, or what you want to be getting? What's off about it? |