First of all you can do it many ways.
Facebook is unique in which they take extra precaution in trying to stop bots that they do not allow. This is anything that is not using
their API.
Dont take it too much to heart, this has been sitting in my github repo since 2014. It might be outdated by now. This would be using python with no 3rd party modules....but requires a lot of work navigating around facebook.
#!/usr/bin/env python3
#get facebook login html
import urllib, http.cookiejar, re, os, sys, getpass
class Facebook():
def __init__(self, email, password):
self.email = email
self.password = password
cj = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
opener.addheaders = [('Referer', 'http://login.facebook.com/login.php'),
('Content-Type', 'application/x-www-form-urlencoded'),
('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 (.NET CLR 3.5.30729)')]
self.opener = opener
def send(self):
pass
def recv(self):
pass
def login(self):
url = 'https://login.facebook.com/login.php?login_attempt=1'
data = "locale=en_US&non_com_login=&email="+self.email+"&pass="+self.password+"&lsd=20TOl"
data = data.encode('ascii')
usock = self.opener.open('http://www.facebook.com')
usock = self.opener.open(url, data)
page = usock.read().decode()
if "Logout" in page:
print("Logged in.")
print(page)
else:
print("failed login")
print(usock.read())
sys.exit()
email = input('Email:')
passwd = getpass.getpass('Password:')
f = Facebook(email, passwd)
f.login()
then there is selenium which controls a browser for you. This could be headless by the way. This will always work if its kept updated because this can read javascript/JQuery. Which is where it is unique as every other method will fail at.
from selenium import webdriver
import time
import os
URL = 'https://www.facebook.com/'
CHROMEPATH = '/home/metulburr/chromedriver'
PHANTOMPATH = '/home/metulburr/phantomjs'
EMAIL = ''
PASSWORD = ''
class App:
def __init__(self):
self.setup_chrome()
#self.setup_headless()
self.login()
self.to_home()
self.to_friends()
#test
friend_name = self.browser.find_element_by_id('fb-timeline-cover-name')
print(friend_name.text)
more_about = str('More About ' + str(friend_name.text))
print(more_about)
self.scroll_to_bottom()
'''
while True:
self.browser.execute_script('window.scrollTo(0, document.body.scrollHeight);')
time.sleep(1)
'''
print('done')
time.sleep(100000) #keep alive to view html
def scroll_to_bottom(self):
driver = self.browser
SCROLL_PAUSE_TIME = 0.5
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
def delay(self):
time.sleep(3)
def chrome_prep(self):
'''get rid of asking to save password and notifications popup'''
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option(
'prefs', {
'credentials_enable_service': False,
"profile.default_content_setting_values.notifications" : 2,
'profile': {
'password_manager_enabled': False
}
}
)
return chrome_options
def setup_chrome(self):
options = self.chrome_prep()
os.environ["webdriver.chrome.driver"] = CHROMEPATH
self.browser = webdriver.Chrome(CHROMEPATH, chrome_options=options)
self.browser.set_window_position(0,0)
self.delay()
def setup_headless(self):
self.browser = webdriver.PhantomJS(PHANTOMPATH)
self.delay()
def login(self):
self.browser.get(URL)
time.sleep(1)
username = self.browser.find_element_by_id("email")
password = self.browser.find_element_by_id("pass")
username.send_keys(EMAIL)
password.send_keys(PASSWORD)
login_attempt = self.browser.find_element_by_xpath("//*[@type='submit']")
login_attempt.submit()
self.delay()
def to_home(self):
self.browser.execute_script("document.getElementsByClassName('linkWrap noCount')[0].click()")
self.delay()
def to_friends(self):
self.browser.execute_script("document.getElementsByClassName('_6-6')[2].click()")
self.delay()
App()
Then you can always go the way facebook wants you to go...use their API. I dont have any code for this....because i dont use their API.
http://facebook-sdk.readthedocs.io/en/latest/api.html
I would say the best method would be to use selenium. It bypasses all facebooks snag-a-bot code to fumble you, and will always work in the future (even if it needs updating here and there).
Either way you should be fluent in scraping websites as you will be navigating HTML/CSS.
https://python-forum.io/Thread-Web-Scraping-part-1
https://python-forum.io/Thread-Web-scraping-part-2