i'm currently trying to follow a tutorial about coding a chatbot with python. i'm new to python and coding in general except for some projects i did in gamemaker(lol). i'm getting an error when trying to run my code and have difficulties figuring out what the problem is. any help would be greatly appreciated. the error i get is:
Error:Traceback (most recent call last):
File "C:\Python\Python36-32\TUTORIALS\chatbot\chatbot-database.py", line 104, in <module>
row = json.loads(row)
File "C:\Python\Python36-32\lib\json\__init__.py", line 354, in loads
return _default_decoder.decode(s)
File "C:\Python\Python36-32\lib\json\decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python\Python36-32\lib\json\decoder.py", line 355, in raw_decode
obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Unterminated string starting at: line 1 column 374 (char 373)
the code i'm trying to run is:1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import sqlite3 import json from datetime import datetime timeframe = '2015-01' sql_transaction = [] connection = sqlite3.connect( '{}.db' . format (timeframe)) c = connection.cursor() def create_table(): c.execute( """CREATE TABLE IF NOT EXISTS parent_reply (parent_id TEXT PRIMARY KEY, comment_id TEXT UNIQUE, parent TEXT, comment TEXT, subbreddit TEXT, unix INT, score INT)""" ) def format_data(data): data = data.replace( "\n" , " newlinechar " ).replace( "\r" , " returnchar " ).replace( '"' , "'") return data def find_existing_score(pid): try : sql = "SELECT score FROM parent_reply WHERE parent_id = '{}' LIMIT 1" . format (pid) c.execute(sql) result = c.fetchone() if result ! = None : return result[ 0 ] else : return False except Exception as e: #print("find_parent", e) return False def acceptable(data): if len (data.split( ' ' )) > 50 or len (data) < 1 : return False elif len (data) > 1000 : return False elif data = = '[deleted]' or data = = '[removed]' : return False else : return True def find_parent(pid): try : sql = "SELECT comment FROM parent_reply WHERE comment_id = '{}' LIMIT 1" . format (pid) c.execute(sql) result = c.fetchone() if result ! = None : return result[ 0 ] else : return False except Exception as e: #print("find_parent", e) return False def transaction_bldr(sql): global sql_transaction sql_transaction.append(sql) if len (sql_transaction) > 1000 : c.execute( 'BEGIN TRANSACTION' ) for s in sql_transaction: try : c.execute(s) except : pass connection.commit() sql_transaction = [] def sql_insert_replace_comment(commentid, parentid, parent, comment, subreddit, time, score): try : sql = """UPDATE parent_reply SET parent_id = ?, comment_id = ?, parent = ?, comment = ?, subreddit = ?, time = ?, score = ? WHERE parent_id = ?;""" . format (parentid, commentid, parent, comment, subreddit, time, score) transaction_bldr(sql) except Exception as e: print ( 's-UPDATE insertion' , str (e)) def sql_insert_has_parent(commentid, parentid, parent, comment, subreddit, time, score): try : sql = """INSERT INTO parent_reply (parent_id, comment_id, parent, comment, subreddit, time, score)""" . format (parentid, commentid, parent, comment, subreddit, time, score) transaction_bldr(sql) except Exception as e: print ( 's-PARENT insertion' , str (e)) def sql_insert_no_parent(commentid, parentid, comment, subreddit, time, score): try : sql = """INSERT INTO parent_reply (parent_id, comment_id, comment, subreddit, time, score)""" . format (parentid, commentid, comment, subreddit, time, score) transaction_bldr(sql) except Exception as e: print ( 's-NO_PARENT insertion' , str (e)) if __name__ = = "__main__" : create_table() row_counter = 0 paired_rows = 0 with open ( "E:/reddit_database/{}/RC_{}" . format (timeframe.split( '-' )[ 0 ], timeframe), buffering = 1000 )as f: for row in f: row_counter + = 1 row = json.loads(row) parent_id = row[ 'parent_id' ] body = format_data(row[ 'body' ]) created_utc = row[ 'created_utc' ] score = row[ 'score' ] subreddit = row[ 'subreddit' ] comment_id = row[ 'name' ] parent_data = find_parent(parent_id) if score > = 5 : if acceptable(body): existing_comment_score = find_existing_score(parent_id) if existing_comment_score: if score > existing_comment_score: sql_insert_replace_comment(comment_id, parent_id, parent_data, body, subreddit, created_utc, score) else : if parent_data: sql_insert_has_parent(comment_id, parent_id, parent_data, body, subreddit, created_utc, score) paired_rows + = 1 else : sql_insert_no_parent(comment_id, parent_id, body, subreddit, created_utc, score) if row_counter % 100000 = = 0 : print ( "Total rows read: {}, Pared rows: {}, Time {}" . format (row_counter, paired_rows, str (datetime.now()))) |