Apr-28-2023, 09:33 AM
Hi Team,
I want to insert CSV Records into sql table via python,
my csv file size is 10 gb,
I found this code on google, Can you suggest better way if someone has done this.
I want to insert CSV Records into sql table via python,
my csv file size is 10 gb,
I found this code on google, Can you suggest better way if someone has done this.
import pyodbc import pandas as pd import csv import time # Define the SQL Server login and password sql_login = 'XXXX\\XXX-XXX-XXX' sql_password = 'xxxxxxx' # Connect to the SQL Server database connection = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER=DESKTOP-GQK64O6;DATABASE=Customer;UID=' + sql_login + ';PWD=' + sql_password + ';') cursor = connection.cursor() # Define the file path and chunk size file_path = 'abc.csv' chunk_size = 1000 # Start the timer start_time = time.time() # Create a pandas dataframe iterator to read the CSV file in chunks csv_iterator = pd.read_csv(file_path, chunksize=chunk_size, sep="|") # Loop through the dataframe iterator for chunk in csv_iterator: # Get the field names for the chunk field_names = list(chunk.columns) # Concatenate the field names into a string field_names_concatenated = ",".join(field_names) # Create a list of values for the chunk values = chunk.values.tolist() # Create a string of percents based on the number of values in each row percents_concatenated = ",".join(len(field_names) * ["?"]) # Create the SQL statement sql = f"INSERT INTO employee ({field_names_concatenated}) VALUES ({percents_concatenated})" # Execute the SQL statement for the chunk cursor.executemany(sql, values) connection.commit() print(cursor.rowcount, "details inserted") # Calculate the elapsed time elapsed_time = time.time() - start_time print(f"Elapsed time: {elapsed_time:.2f} seconds")