Jan-06-2017, 06:15 AM
excuse me.
i am new in python programming language
i try to input data from csv file to hadoop hbase with python script
but everytime i try to compile the script, the script always return me the error like "NO PROTOCOL HEADER VERSION" .
anybody knows what can i do to solve that ?
this is my script
i am new in python programming language
i try to input data from csv file to hadoop hbase with python script
but everytime i try to compile the script, the script always return me the error like "NO PROTOCOL HEADER VERSION" .
anybody knows what can i do to solve that ?
this is my script
import xlrd import csv import happybase import time import errno import sys batch_size=1000 host="myhost " file_path="2005.csv" row_count=0 start_time = time.time() table_name = "testing_xls" def connect_to_hbase(): #connect to HBase server conn=happybase.Connection(host=host, table_prefix_separator=":", autoconnect=False) conn.open() print(conn.tables()) table=conn.table(table_name) batch=table.batch(batch_size=batch_size) return conn,batch def insert_row(batch,row): #insert row to hbase batch.put(row[0],{"xls:tahun":row[1],"xls:gelombang":row[2],"xls:nama":row[3], "xls:jumlah_pilihan":row[4],"xls:pilihan1":row[5],"xls:pilihan2":row[6], "xls:kode_sekolah":row[7],"xls:nama_Sekolah":row[8],"xls:kota_sekolah":row[9], "xls:alamat_sekolah":row[10],"xls:nim":row[11],"xls:nilai1":row[12], "xls:nilai2":row[13],"xls:status":row[14],"xls:terima_fakultas":row[15], "xls:keterangan":row[16]}) def read_csv(): #read the csv file from path csvfile=open(file_path,"r") csvreader=csv.reader(csvfile) return csvreader,csvfile conn,batch = connect_to_hbase() print "Connect to Hbase. Table name : %s, bacthsize %i" % (table_name,batch_size) csvreader,csvfile = read_csv() print "Connected to file. name : %s" % (file_path) try: for row in csvreader: row_count += 1 insert_row(batch,row) batch.send() except IOError as e: if e.errno == errno.EPIPE: print "error broken pipe catched" finally: csvfile.close() conn.close() duration = time.time() - start_time print "Done. row count : %i, duration %.3f s" % (row_count,duration)thank you very much, please help if you can help me..