Matching Regex in Python from Excelfile and inserting into database

Shuzo · May-14-2020, 06:53 PM

Hello. Im trying to store multiple excel files into my database but need to match the rows of each excel row by Timestamp, ID, Content before inserting it. Therefor im using REGEX

Each Timestamp ID and Content can vary from row to row and from excel file to excel file.
Im stucking with an Errorcode i get, I hope you can help me out here.

This is my code :

 import os
import re
import pymysql
pymysql.install_as_MySQLdb()
import pandas as pd
import sqlalchemy


def insert_or_update(engine, pd_table, table_name):
    inserts = 0
    updates = 0
    for i in range(len(pd_table)):
        vals_with_quotes = ["'" + str(x) + "'" for x in pd_table.loc[i, :].values]
        # print(vals_with_quotes)
        update_pairs = [str(c) + " = '" + str(v) + "'" for c, v in zip(pd_table.columns, pd_table.loc[i, :])]
        query = f"INSERT INTO {table_name} ({', '.join(list(pd_table.columns.values))}) " \
                f"VALUES ({', '.join(vals_with_quotes)}) " \
                f"ON DUPLICATE KEY UPDATE {', '.join(update_pairs)}"
        print(query)
        result = engine.execute(query)
        if result.lastrowid == 0:
            updates += 1
        else:
            inserts += 1
    print(f"Inserted {inserts} rows and updated {updates} rows.")


schema = '---'
alchemy_connect = "---"
engine = sqlalchemy.create_engine(alchemy_connect)  # connect to server
engine.execute(f"USE {schema}")  # select new db
# engine.execute("SET NAMES UTF8MB4;")

query = "SELECT * FROM --- where study=---"
pm = pd.read_sql(query, engine)

rootpath = "path/"

for root, dirs, files in os.walk(rootpath):
    for file in files:
        print(root, dirs, files, file)
        d = pd.read_excel(root + file, header=None)
        d.drop(columns=[0], inplace=True)
        d.rename(columns={1: "content"}, inplace=True)

        participants = []

        for ix, row in d.iterrows():
            c = row["content"]

            match = re.search(r"^\[(\d{1,2}:\d{1,2}:\d{1,2})]\s+(\w+):\s*(.*)", c)
            if match:
                timestamp = match.group(1)
                id_code = match.group(2)
                only_content = match.group(3)
                if re.search(r"(?:Versuch(?:sleit(?:er)?)?|ersuchsleiter)", id_code):
                    id_code = "Versuchsleiter"

            # get pid
            participant = pm.loc[pm["id_code"] == id_code, "pid"]
            try:
                pid = participant.values[0]
            except:
                pid = "Versuchsleiter"

            # print(ix, pid, id_code, only_content, timestamp)
            if pid and pid not in participants and pid != "Versuchsleiter":
                participants.append(pid)
            d.loc[ix, "pid"] = pid
            d.loc[ix, "timestamp"] = timestamp
            d.loc[ix, "content"] = only_content.strip()
            d.loc[ix, "is_participant"] = 0 if pid == "Versuchsleiter" else 1

        d = d[["pid", "is_participant", "content", "timestamp"]]
        d.loc[(d['pid'] == "Versuchsleiter"), "pid"] = participants[0]
        d.loc[(d['pid'] == None), "pid"] = participants[0]

        # with pd.option_context('display.max_rows', None, 'display.max_columns', None):
            # print(d)
        insert_or_update(engine, d, "YM_Transcripts")

And this is the Errorcode i get :

 
File "C:/Users/.../PycharmProjects/.../.../---.py", line 20, in insert_or_update
    result = engine.execute(query)

  File "C:\Users\...\PycharmProjects\...\...\lib\site-packages\sqlalchemy\engine\base.py", line 2191, in execute
    return connection.execute(statement, *multiparams, **params)

TypeError: not enough arguments for format string

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	Regex replace in SQLite3 database	WJSwan	1	922	Dec-04-2023, 05:55 PM Last Post: Larz60+
	problem in matching in regex	akbarza	1	492	Nov-21-2023, 09:31 AM Last Post: snippsat
	python script for inserting rows into hbase table	lravikumarvsp	7	7,386	Mar-24-2023, 04:44 AM Last Post: parth_botadara
	Basic SQL query using Py: Inserting or querying sqlite3 database not returning data	marlonbown	3	1,620	Nov-08-2022, 07:16 PM Last Post: marlonbown
	Python Regex	quest	2	2,615	Sep-22-2022, 03:15 AM Last Post: quest
	python regex: get rid of double dot	wardancer84	4	2,499	Sep-09-2021, 03:03 PM Last Post: wardancer84
	Using Regex Expression With Isin in Python	eddywinch82	0	2,382	Apr-04-2021, 06:25 PM Last Post: eddywinch82
	Exception handling in regex using python	ShruthiLS	1	2,447	May-04-2020, 08:12 AM Last Post: anbu23
	Python the regex not getting any attributes	sarath_unrelax	1	1,934	Dec-19-2019, 11:06 AM Last Post: Larz60+
	Python regex to get only numbers	tantony	6	4,328	Oct-09-2019, 11:53 PM Last Post: newbieAuggie2019

Matching Regex in Python from Excelfile and inserting into database

User Panel Messages

Announcements