Python Forum

Full Version: Read All Emails from Outlook and add the word counts to a DataFrame
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
import win32com.client
import os
import time
import datetime as dt
import re
import pandas as pd

DateFilter = dt.datetime.now() - dt.timedelta(days = 2)

#Outlook MAPI
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
#Inbox Folder
inbox = outlook.GetDefaultFolder(6)
root_folder = outlook.Folders.Item(1)
print(root_folder.Name)
#Sort emails in inbox
messages = inbox.Items
messages.Sort("[ReceivedTime]", True)
#Filter emails to go through
DateFilterMsg = messages.Restrict("[ReceivedTime] >= '" + DateFilter.strftime('%m/%d/%Y %H:%M %p')+"'")

for message in messages:
if message.Class==43: # Get COM Objects.
body = message.HTMLBody.lower()
content = ('Body: {}'.format(body))#format
words = message.Body.split(" ")#split
words.sort()#sort
uniquewords = list((word,words.count(word)) for word in set(words))
df = pd.DataFrame(uniquewords, columns = ['word_name','word_cnt']) #Word Name and respective Counts
print(df)
This is how get the output, May be it is breaking the word counts by email. I want to see all the word counts in one dataFrame and not by breakUp. For example word system should give me a count of 2 and not two separate tables.

Output:
Index word_name word_cnt 0 System 1 Index word_name word_cnt 0 System 1