Nov-02-2018, 01:50 PM
I'm conducting a data science study in which I need to review tweets. I've set up a streamer that can get live tweets and save them to a csv file. However to review the tweets I need them in a pandas data frame. I have already made a class object that converts a list containing the incoming tweets to a data frame.
I now need the tweet streamer to append the incoming tweets to a list so that this list can be used by the data frame converter class object.
Can I do this within the on_data function so each list is being passed to the data frame converter as it comes in.
Or do I need to create a new function that opens the csv and creates a list from that before passing it to the data frame converter.
English isnt my first language and I'm fairly new to python so sorry for any errors.
I now need the tweet streamer to append the incoming tweets to a list so that this list can be used by the data frame converter class object.
Can I do this within the on_data function so each list is being passed to the data frame converter as it comes in.
Or do I need to create a new function that opens the csv and creates a list from that before passing it to the data frame converter.
English isnt my first language and I'm fairly new to python so sorry for any errors.
from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream from tweepy import API from tweepy import Cursor import twitter_credentials import numpy as np import pandas as pd # # # # TWITTER STREAMER # # # # class TwitterStreamer(): """ Class for streaming and processing live tweets. """ def __init__(self): pass def stream_tweets(self, retrieve_tweets_filename, tweet_subject): # This handles Twitter authetification and the connection to Twitter Streaming API listener = StdOutListener(retrieve_tweets_filename) auth = OAuthHandler(twitter_credentials.consumer_key, twitter_credentials.consumer_secret) auth.set_access_token(twitter_credentials.access_token, twitter_credentials.access_token_secret) stream = Stream(auth, listener) # This line filter Twitter Streams to capture data by the keywords: stream.filter(track=tweet_subject) # # # # TWITTER STREAM LISTENER # # # # class StdOutListener(StreamListener): """ This is a basic listener that just prints received tweets to stdout. """ def __init__(self, retrieve_tweets_filename): self.retrieve_tweets_filename = retrieve_tweets_filename def on_data(self, data): #saves incoming tweets to csv file retrieve_tweets_filename try: print(data) #prints out incoming tweets with open(self.retrieve_tweets_filename, 'a') as tf: tf.write(data) return True except BaseException as e: print("Error on_data %s" % str(e)) return True #In addition to saving as a CSV files I need to append these incoming tweets to a list tweets. Can I do this continuously # or is it easier to create a new function that opens the csv and generates a list from that def on_error(self, status): print(status) class TweetAnalyzer(): """ Functionality for analyzing and categorizing content from tweets. """ def tweets_to_data_frame(self, tweets): df = pd.DataFrame(data=[tweets.tex for tweet in tweets], columns=['Tweets']) #df['id'] = np.array([tweet.id for tweet in tweets]) df['date'] = np.array([tweet.created_at for tweet in tweets], columns=['Date']) df['source'] = np.array([tweet.source for tweet in tweets], columns=['Twitter Handle']) df['likes'] = np.array([tweet.favorite_count for tweet in tweets], columns=['Number of likes']) df['retweets'] = np.array([tweet.retweet_count for tweet in tweets], columns=['Number of reweets']) df['len'] = np.array([len(tweet.text) for tweet in tweets], columns=['Character length']) return df if __name__ == '__main__': # Authenticate using config.py and connect to Twitter Streaming API. tweet_subject = ["help", "me"] retrieved_tweets_filename = "tweets.csv" tweet_analyzer = TweetAnalyzer() df = tweet_analyzer.tweets_to_data_frame(tweets) print(df())Full explanation and the code can be found here https://stackoverflow.com/questions/5310...nto-a-list