i'm currently trying to follow a tutorial about coding a chatbot with python. i'm new to python and coding in general except for some projects i did in gamemaker(lol). i'm getting an error when trying to run my code and have difficulties figuring out what the problem is. any help would be greatly appreciated. the error i get is:
Error:Traceback (most recent call last):
File "C:\Python\Python36-32\TUTORIALS\chatbot\chatbot-database.py", line 104, in <module>
row = json.loads(row)
File "C:\Python\Python36-32\lib\json\__init__.py", line 354, in loads
return _default_decoder.decode(s)
File "C:\Python\Python36-32\lib\json\decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python\Python36-32\lib\json\decoder.py", line 355, in raw_decode
obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Unterminated string starting at: line 1 column 374 (char 373)
the code i'm trying to run is:import sqlite3 import json from datetime import datetime timeframe = '2015-01' sql_transaction = [] connection = sqlite3.connect('{}.db'.format(timeframe)) c = connection.cursor() def create_table(): c.execute("""CREATE TABLE IF NOT EXISTS parent_reply (parent_id TEXT PRIMARY KEY, comment_id TEXT UNIQUE, parent TEXT, comment TEXT, subbreddit TEXT, unix INT, score INT)""") def format_data(data): data = data.replace("\n", " newlinechar ").replace("\r", " returnchar ").replace('"', "'") return data def find_existing_score(pid): try: sql = "SELECT score FROM parent_reply WHERE parent_id = '{}' LIMIT 1".format(pid) c.execute(sql) result = c.fetchone() if result != None: return result[0] else: return False except Exception as e: #print("find_parent", e) return False def acceptable(data): if len(data.split(' ')) > 50 or len(data) < 1: return False elif len(data) > 1000: return False elif data == '[deleted]' or data == '[removed]': return False else: return True def find_parent(pid): try: sql = "SELECT comment FROM parent_reply WHERE comment_id = '{}' LIMIT 1".format(pid) c.execute(sql) result = c.fetchone() if result != None: return result[0] else: return False except Exception as e: #print("find_parent", e) return False def transaction_bldr(sql): global sql_transaction sql_transaction.append(sql) if len(sql_transaction) > 1000: c.execute('BEGIN TRANSACTION') for s in sql_transaction: try: c.execute(s) except: pass connection.commit() sql_transaction = [] def sql_insert_replace_comment(commentid, parentid, parent, comment, subreddit, time, score): try: sql = """UPDATE parent_reply SET parent_id = ?, comment_id = ?, parent = ?, comment = ?, subreddit = ?, time = ?, score = ? WHERE parent_id = ?;""".format(parentid, commentid, parent, comment, subreddit, time, score) transaction_bldr(sql) except Exception as e: print('s-UPDATE insertion', str(e)) def sql_insert_has_parent(commentid, parentid, parent, comment, subreddit, time, score): try: sql = """INSERT INTO parent_reply (parent_id, comment_id, parent, comment, subreddit, time, score)""".format(parentid, commentid, parent, comment, subreddit, time, score) transaction_bldr(sql) except Exception as e: print('s-PARENT insertion', str(e)) def sql_insert_no_parent(commentid, parentid, comment, subreddit, time, score): try: sql = """INSERT INTO parent_reply (parent_id, comment_id, comment, subreddit, time, score)""".format(parentid, commentid, comment, subreddit, time, score) transaction_bldr(sql) except Exception as e: print('s-NO_PARENT insertion', str(e)) if __name__ == "__main__": create_table() row_counter = 0 paired_rows = 0 with open("E:/reddit_database/{}/RC_{}".format(timeframe.split('-')[0], timeframe), buffering=1000)as f: for row in f: row_counter +=1 row = json.loads(row) parent_id = row['parent_id'] body = format_data(row['body']) created_utc = row['created_utc'] score = row['score'] subreddit = row['subreddit'] comment_id = row['name'] parent_data = find_parent(parent_id) if score >= 5: if acceptable(body): existing_comment_score = find_existing_score(parent_id) if existing_comment_score: if score > existing_comment_score: sql_insert_replace_comment(comment_id, parent_id, parent_data, body, subreddit, created_utc, score) else: if parent_data: sql_insert_has_parent(comment_id, parent_id, parent_data, body, subreddit, created_utc, score) paired_rows += 1 else: sql_insert_no_parent(comment_id, parent_id, body, subreddit, created_utc, score) if row_counter % 100000 == 0: print("Total rows read: {}, Pared rows: {}, Time {}".format(row_counter, paired_rows, str(datetime.now())))