Sep-01-2017, 02:05 PM
I have two files, from which I need to get ip and port information or more generally find what is equal or what is different using mrjob. The following code is a draft of what I'm trying to do.
from mrjob.job import MRJob import csv cols='ipv,vvv.vvv.vvv'.split(',') def parse_line(line_str): return dict(zip(cols,[a.strip() for a in csv.reader([line_str]).next()])) def parse_ip_address(ip_str): ip=ip_str; return ip def parse_port(p_str): port=p_str; return (port) class Myclass(MRJob): def mapper(self, _, line): #parse the line from current CVS form p1=parse_line(line) #get ip ip_address=parse_ip_address(p1['vvv.vvv.vvv']) #get port port=parse_port(p1['ipv']) #yield a key and a value yield port,ip_address #reducer function def reducer(self, key, values): yield key if __name__ == '__main__': Myclass.run()