Python Forum
error in database for chatbot - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: General Coding Help (https://python-forum.io/forum-8.html)
+--- Thread: error in database for chatbot (/thread-8927.html)



error in database for chatbot - TaraSunray - Mar-13-2018

i'm currently trying to follow a tutorial about coding a chatbot with python. i'm new to python and coding in general except for some projects i did in gamemaker(lol). i'm getting an error when trying to run my code and have difficulties figuring out what the problem is. any help would be greatly appreciated. the error i get is:

Error:
Traceback (most recent call last): File "C:\Python\Python36-32\TUTORIALS\chatbot\chatbot-database.py", line 104, in <module> row = json.loads(row) File "C:\Python\Python36-32\lib\json\__init__.py", line 354, in loads return _default_decoder.decode(s) File "C:\Python\Python36-32\lib\json\decoder.py", line 339, in decode obj, end = self.raw_decode(s, idx=_w(s, 0).end()) File "C:\Python\Python36-32\lib\json\decoder.py", line 355, in raw_decode obj, end = self.scan_once(s, idx) json.decoder.JSONDecodeError: Unterminated string starting at: line 1 column 374 (char 373)
the code i'm trying to run is:

import sqlite3
import json
from datetime import datetime

timeframe  = '2015-01'
sql_transaction = []

connection = sqlite3.connect('{}.db'.format(timeframe))
c = connection.cursor()

def create_table():
    c.execute("""CREATE TABLE IF NOT EXISTS parent_reply
              (parent_id TEXT PRIMARY KEY, comment_id TEXT UNIQUE,
              parent TEXT, comment TEXT, subbreddit TEXT, unix INT, score INT)""")

def format_data(data):
    data = data.replace("\n", " newlinechar ").replace("\r", " returnchar ").replace('"', "'")
    return data

def find_existing_score(pid):
    try:

        sql = "SELECT score FROM parent_reply WHERE parent_id = '{}' LIMIT 1".format(pid)
        c.execute(sql)
        result = c.fetchone()
        if result != None:
            return result[0]
        else: return False
    except Exception as e:
        #print("find_parent", e)
        return False

def acceptable(data):
    if len(data.split(' ')) > 50 or len(data) < 1:
        return False
    elif len(data) > 1000:
        return False
    elif data == '[deleted]' or data == '[removed]':
        return False
    else:
        return True
    
    
def find_parent(pid):
    try:

        sql = "SELECT comment FROM parent_reply WHERE comment_id = '{}' LIMIT 1".format(pid)
        c.execute(sql)
        result = c.fetchone()
        if result != None:
            return result[0]
        else: return False
    except Exception as e:
        #print("find_parent", e)
        return False

def transaction_bldr(sql):
    global sql_transaction
    sql_transaction.append(sql)
    if len(sql_transaction) > 1000:
        c.execute('BEGIN TRANSACTION')
        for s in sql_transaction:
            try:
                c.execute(s)
            except:
                pass
        connection.commit()
        sql_transaction = []
            
    

def sql_insert_replace_comment(commentid, parentid, parent, comment, subreddit, time, score):
    try:
        sql = """UPDATE parent_reply SET parent_id = ?, comment_id = ?, parent = ?, comment = ?, subreddit = ?, time = ?, score = ? WHERE parent_id = ?;""".format(parentid, commentid, parent, comment, subreddit, time, score)
        transaction_bldr(sql)
    except Exception as e:
        print('s-UPDATE insertion', str(e))

def sql_insert_has_parent(commentid, parentid, parent, comment, subreddit, time, score):
    try:
        sql = """INSERT INTO parent_reply (parent_id, comment_id, parent, comment, subreddit, time, score)""".format(parentid, commentid, parent, comment, subreddit, time, score)
        transaction_bldr(sql)
    except Exception as e:
        print('s-PARENT insertion', str(e))

def sql_insert_no_parent(commentid, parentid, comment, subreddit, time, score):
    try:
        sql = """INSERT INTO parent_reply (parent_id, comment_id, comment, subreddit, time, score)""".format(parentid, commentid, comment, subreddit, time, score)
        transaction_bldr(sql)
    except Exception as e:
        print('s-NO_PARENT insertion', str(e))


        
    
if __name__ == "__main__":
              create_table()
              row_counter = 0
              paired_rows = 0

              with open("E:/reddit_database/{}/RC_{}".format(timeframe.split('-')[0], timeframe), buffering=1000)as f:
                  for row in f:
                      row_counter +=1
                      row = json.loads(row)
                      parent_id = row['parent_id']
                      body = format_data(row['body'])
                      created_utc = row['created_utc']
                      score = row['score']
                      subreddit = row['subreddit']
                      comment_id = row['name']
                      parent_data = find_parent(parent_id)

                      if score >= 5:
                          if acceptable(body):
                              existing_comment_score = find_existing_score(parent_id)
                              if existing_comment_score:
                                  if score > existing_comment_score:
                                      sql_insert_replace_comment(comment_id, parent_id, parent_data, body, subreddit, created_utc, score)

                                

                              else:
                                  if parent_data:
                                      sql_insert_has_parent(comment_id, parent_id, parent_data, body, subreddit, created_utc, score)
                                      paired_rows += 1
                                  else:
                                      sql_insert_no_parent(comment_id, parent_id, body, subreddit, created_utc, score)



                      if row_counter % 100000 == 0:
                          print("Total rows read: {}, Pared rows: {}, Time {}".format(row_counter, paired_rows, str(datetime.now())))