Python Forum

Full Version: Open and read multiple text files and match words
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hi,

How can I create a script that reads two text files and prints out words that match within text file number 1?
This code below is the furthest I got, it can match words in string and print it out, but I need it to read two or more large text files and print same found matched words. Thank you.

import re

def get_words_from_string(s):
    return set(re.findall(re.compile('\w+'), s.lower()))

def get_words_from_file(fname):
    with open(fname, 'rb') as inf:
        return get_words_from_string(inf.read())

def all_words(needle, haystack):
    return set(needle).issubset(set(haystack))

def any_words(needle, haystack):
    return set(needle).intersection(set(haystack))

search_words = get_words_from_string("this my test")
find_in = get_words_from_string("If this were my test, I is passing")

print (search_words)
This can be compacted by using list comprehension, but will do the job
import os

def get_words(filename):
    wordlist = []
    with open(filename) as fp:
        for line in fp:
            wordsinline = line.strip().split()
            for item in wordsinline:
                if item not in wordlist:
                    wordlist.append(item)
    return wordlist

def find_common_words(filename1, filename2):
    wordlist1 = []
    wordlist2 = []
    matching_words = []

    wordlist1 = get_words(filename1)
    wordlist2 = get_words(filename2)

    matching_words = set(wordlist1) & set(wordlist2)
    print(matching_words)

def testit():
    # Assert in same directory as code
    os.chdir(os.path.abspath(os.path.dirname(__file__)))
    filename1 = 'words1.txt'
    filename2 = 'words2.txt'
    find_common_words(filename1, filename2)

if __name__ == '__main__':
    testit()
Thank you my man, love you.
I see my varbatim response on stackoverflow https://stackoverflow.com/questions/tagged/python how nice.