![]() |
Jupyter error - 'The kernel appears to have died, it will restart automatically' - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: General Coding Help (https://python-forum.io/forum-8.html) +--- Thread: Jupyter error - 'The kernel appears to have died, it will restart automatically' (/thread-10898.html) |
Jupyter error - 'The kernel appears to have died, it will restart automatically' - meganhollie - Jun-12-2018 I'm running python in a jupyter notebook, and it keeps crashing and giving me this error message. This is code I ran successfully last week! I haven't changed anything, but suddenly it won't work. I've updated jupyter, reset my computer multiple times, and checked the input files, but nothing has helped. This is not code I wrote, but is an edited version of a topic modeling code from Github...I don't have the knowledge or experience to know where the errors are, but everything runs fine until it gets to [btm.run()] Any ideas, or suggestions, are very much appreciated. from collections import defaultdict import operator import os import random import time import nltk from nltk.tokenize import sent_tokenize, word_tokenize import os from itertools import chain from glob import glob file_content = open("nippur input.txt").read() tokens = nltk.word_tokenize(file_content) from nltk.corpus import stopwords stop_words = set(stopwords.words("nippurstopwords.txt")) file1 = open("nippur input.txt") line = file1.read() words = line.split() for r in words: if not r in stop_words: appendFile = open("filteredtext.txt","a") appendFile.write(" "+r) appendFile.close() class BTM(object): def __init__(self, data_path, alpha, beta, num_iter, num_topic, output_dir): self.data_path = data_path self.alpha = alpha self.beta = beta self.num_iter = num_iter self.num_topic = num_topic self.output_dir = output_dir self.word2Id = {} self.Id2Word = {} self.vocab_size = 0 self.wordId_corpus = [] self.biterms_in_doc = [] #list of dictionaries long->int self.num_doc_biterm = defaultdict(int) self.biterms = [] #List of numbers self.topic_biterm = [] self.topic_word_num = [] #list of lists self.num_topic_biterm = [] self.biterm_sum = {} #Map from long to double def get_file_reader(self, path = None): if path is None: path = self.data_path f = open(path, 'r') return f def get_file_writer(self,path, append = False): if append: read_mode = 'a' else: read_mode = 'w' g = open(os.path.join(self.output_dir, path), read_mode) return g def print_params(self): params = ['alpha','beta','num_iter','num_topic','topic_word_num','num_topic_biterm','topic_biterm'] for param in params: print(param,':',getattr(self, param)) print('-'*40) def load_data(self): f = self.get_file_reader() for line in f.readlines(): words = line.split() curr_doc = [] for word in words: if word not in self.word2Id: index = len(self.word2Id) self.word2Id[word] = index self.Id2Word[index] = word curr_doc.append(self.word2Id[word]) self.wordId_corpus.append(curr_doc) f.close() self.num_doc_biterm = [0]*len(self.wordId_corpus) def init_model(self): for doc_number, doc in enumerate(self.wordId_corpus): oneCop = defaultdict(int) for word1 in doc: for word2 in doc: if(word1<word2): item_num = word1*1000000+word2 #encoding the biterms oneCop[item_num] +=1 self.biterms.append(item_num) self.num_doc_biterm[doc_number] +=1 self.biterms_in_doc.append(oneCop) self.vocab_size = len(self.word2Id) self.topic_biterm = [0]*len(self.biterms) self.topic_word_num = {j: {i:0 for i in range(self.num_topic)} for j in range(self.vocab_size)} print(len(self.topic_word_num), len(self.topic_word_num[0])) self.num_topic_biterm = [1]*self.num_topic for biterm_index, biterm in enumerate(self.biterms): topic_id = random.randint(0, self.num_topic-1) #if biterm_index 5: #print(biterm, biterm%1000000, biterm//1000000) #print(self.topic_word_num) self.topic_word_num[biterm%1000000][topic_id] +=1 self.topic_word_num[biterm//1000000][topic_id] +=1 self.topic_biterm[biterm_index] = topic_id def save_topic_words(self, topic_word_num = 10): writer = self.get_file_writer(path = 'model-final-topic-words.txt') for topic_id in range(self.num_topic): topic_line = {} for word_id, word in enumerate(self.word2Id): topic_line[word_id] = self.topic_word_num[word_id][topic_id]/ self.num_topic_biterm[topic_id] / 2 sorted_topic_line = sorted(topic_line.items(), key = operator.itemgetter(1) ) writer.write("Topic:"+str(topic_id) + '\n') for topic_word,score in sorted_topic_line[:topic_word_num]: writer.write("\t"+str(self.Id2Word[topic_word])+"\t"+str(score) + '\n') writer.close() def save_wordIds(self): writer = self.get_file_writer(path = 'model-final-wordIds.txt') for key,value in self.word2Id.items(): writer.write(str(key) + ' ' + str(value) + '\n') writer.close() def get_sum(self, biterm): if biterm not in self.biterm_sum: word1 = biterm//1000000 word2 = biterm%1000000 sum = 0 for topic_id in range(self.num_topic): calculation = (self.num_topic_biterm[topic_id] + self.alpha) * (self.topic_word_num[word1][topic_id] + self.beta) * (self.topic_word_num[word2][topic_id] + self.beta) / ((2 * self.num_topic_biterm[topic_id] ) + (self.vocab_size * self.beta))**2 sum += calculation self.biterm_sum[biterm] = sum return self.biterm_sum[biterm] def save_theta(self): writer = self.get_file_writer(path = 'model-final-theta.txt') for doc_index, line in enumerate(self.biterms_in_doc): for topic_id in range(self.num_topic): one_sum = 0 for key in line: word1 = key//1000000 word2 = key%1000000 one_sum += ((line[key]/self.num_doc_biterm[doc_index]) * ((self.num_topic_biterm[topic_id] + self.alpha) * (self.topic_word_num[word1][topic_id] + self.beta) * (self.topic_word_num[word2][topic_id] + self.beta) / ((2 * self.num_topic_biterm[topic_id] ) + (self.vocab_size * self.beta))**2)/(self.get_sum(key))) writer.write(str(one_sum) + " ") writer.write('\n') writer.close() def save_phi(self): writer = self.get_file_writer(path = 'model-final-phi.txt') for topic_id in range(self.num_topic): for word_id in self.Id2Word: calculation = (self.topic_word_num[word_id][topic_id] + self.beta) / ((self.num_topic_biterm[topic_id] * 2) + (self.vocab_size * self.beta)) writer.write(str(calculation) + ' ') writer.write('\n') writer.close() def build_model(self): for it in range(self.num_iter): start_time = time.time() for biterm_index, old_topic_id in enumerate(self.topic_biterm): word1 = self.biterms[biterm_index]//1000000 word2 = self.biterms[biterm_index]%1000000 self.topic_word_num[word1][old_topic_id] -=1 self.topic_word_num[word2][old_topic_id] -=1 self.num_topic_biterm[old_topic_id] -=1 new_topic_id = -1 p = [0]*self.num_topic for k in range(self.num_topic): p[k] = (self.num_topic_biterm[k] + self.alpha) * (self.topic_word_num[word1][k] + self.beta) * (self.topic_word_num[word2][k] + self.beta) / ((2 * self.num_topic_biterm[k] ) + (self.vocab_size * self.beta))**2 for k in range(1,self.num_topic): p[k] += p[k-1] u = random.random() * p[-1] for k in range(self.num_topic): if u < p[k]: new_topic_id = k break self.topic_word_num[word1][new_topic_id] +=1 self.topic_word_num[word2][new_topic_id] +=1 self.num_topic_biterm[new_topic_id] += 1 self.topic_biterm[biterm_index] = new_topic_id print('Finished iteration:', it, 'Time taken:' + str(time.time()-start_time)) def save_result(self): self.save_topic_words(20) self.save_theta() self.save_wordIds() self.save_phi() def run(self): self.load_data() self.init_model() self.build_model() self.save_result() btm = BTM(data_path='../Topic Modeling/filteredtext.txt',alpha=2,beta=0.001, num_iter=10, num_topic=10, output_dir='.') btm.run() btm.save_result() RE: Jupyter error - 'The kernel appears to have died, it will restart automatically' - Larz60+ - Jun-12-2018 Seems it be easier, and more logical to get the code working outside of Jupyter notebook first, and then adding it back. You are complicating things by running it in Jupyter. If you need help setting up an environment for this, ask. RE: Jupyter error - 'The kernel appears to have died, it will restart automatically' - volcano63 - Jun-12-2018 (Jun-12-2018, 09:16 PM)Larz60+ Wrote: Seems it be easier, and more logical to get the code working outside of Jupyter notebook first, and then adding it back. I am just curious - why? I use Jupyter all the time, locally and remotely - without a glitch. There are also free Azure notebooks I am not sure what your problem is, but if your computer receives dynamic IP, Jupyter session may become un-available - it often happens to me when I open session in the office, and then try to access it at home. You must kill the old session and start a new one. I usually open it in "no-browser mode" and then open the produced link in a browser (I work on Linux) I click the link at the bottom - and voila! - Jupyter session is ready
RE: Jupyter error - 'The kernel appears to have died, it will restart automatically' - Larz60+ - Jun-12-2018 Quote:why? I use Jupyter all the time, locally and remotely - without a glitch ... but if your computer receives dynamic IP, Jupyter session may become un-available'nuff' said RE: Jupyter error - 'The kernel appears to have died, it will restart automatically' - volcano63 - Jun-12-2018 (Jun-12-2018, 09:50 PM)Larz60+ Wrote:So, if you don't know the answer - "don't use it" is a good answer?!Quote:why? I use Jupyter all the time, locally and remotely - without a glitch ... but if your computer receives dynamic IP, Jupyter session may become un-available'nuff' said PS And since you obviously don't know what you are talking about - Jupyter sessions are auto-saved RE: Jupyter error - 'The kernel appears to have died, it will restart automatically' - Larz60+ - Jun-12-2018 be nice, I use Jupyter all the time. |