It threw the following error
Thank you so much for your help, it is much appreciated! You saved me many hours of manual work.
The final code is as follows.
Error:writerow() takes exactly one argument (4 given)
which I fixed by adding "[" and "]" around the writerow arguments. Then it threwError:NameError: name 'random' is not defined
which I fixed by adding "import random" and it now works. Thank you so much for your help, it is much appreciated! You saved me many hours of manual work.
The final code is as follows.
import csv import string import random def find_cloze(sentence, frequency_list): translator = str.maketrans(string.punctuation, ' '*len(string.punctuation)) sentence = sentence.translate(translator) max_frequency = 20001 # 50k frequency list cut down to 20,000 entries min_frequency = max_frequency min_word = None valid_words = [] for word in sentence.split(): if word.isupper() or word.istitle(): continue # Skip proper nouns if len(word) <= 2: continue # Skip tiny words valid_words.append(word) word_frequency = int(frequency_list.get(word.lower(), max_frequency)) if word_frequency < min_frequency: min_word = word min_frequency = word_frequency if min_word: return min_word else: if valid_words: return random.choice(valid_words) else: return None def make_index(path, delimiter, value=1): d = dict() with open(path, newline='') as f: reader = csv.reader(f, delimiter=delimiter) for row in reader: d[row[0]] = row[value] return d def generate(target_file, native_file, links_file, frequency_file): print("Making indexes ...") target = make_index(target_file, '\t', value=2) native = make_index(native_file, '\t', value=2) links = make_index(links_file, '\t') # Make index between word and usage frequency frequency = make_index(frequency_file, ' ') print("Generating clozes ...") with open("out.csv", 'w', newline='') as outfile: writer = csv.writer( outfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) # For each target sentence for target_number, target_sentence in target.items(): # Lookup native translation native_number = links.get(target_number) if not native_number: continue # If no native translation, skip native_sentence = native.get(native_number) if not native_sentence: continue # If no native translation, skip # Find the cloze word target_cloze_word = find_cloze(target_sentence, frequency) if not target_cloze_word: continue # If no cloze word, skip clozed = target_sentence.replace( target_cloze_word, '{{{{c1::{}}}}}'.format(target_cloze_word) ) writer.writerow( [target_number, clozed, native_number, native_sentence]) print("Done.") generate('target.csv', 'native.csv', 'links.csv', 'frequency.txt')Edit: Changed code to be "language agnostic".