Python Forum
Inconsistent behaviour in output - web scraping
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Inconsistent behaviour in output - web scraping
#7
Snippsat is correct, I should have used selenium from the start.

Here's a version that will get the header_info that you were looking for.
You can use beautiful to extract details from that.

from selenium import webdriver
from bs4 import BeautifulSoup
import time
import os
import sys


class GetQuotes:
    def __init__(self):
        # Make sure path same as script location
        os.chdir(os.path.abspath(os.path.dirname(__file__)))

    def get_quote(self, symbol):
        url = f"https://finance.yahoo.com/quote/{symbol}"
        self.start_browser()
        self.browser.get(url)
        time.sleep(4)
        page = self.browser.page_source
        soup = BeautifulSoup(page, 'lxml')
        self.stop_browser()
        header_info = soup.find('div', id='quote-header-info')

        # Extract your info from header_info, example below


    def start_browser(self):
        caps = webdriver.DesiredCapabilities().FIREFOX
        caps["marionette"] = True
        self.browser = webdriver.Firefox(capabilities=caps)

    def stop_browser(self):
        self.browser.close()

def main(argv):
    if len(argv) > 1:
        symbol = argv[1]
    else:
        symbol = 'IOZ.AX'
    gq = GetQuotes()
    gq.get_quote(symbol)


if __name__ == '__main__':
    main(sys.argv)
This is what's contained in header_info:
Output:
<div class="quote-header-section Cf Pos(r) Mb(5px) Bgc($lv2BgColor) Maw($maxModuleWidth) Miw($minGridWidth) smartphone_Miw(ini) Miw(ini)!--tab768 Miw(ini)!--tab1024 Mstart(a) Mend(a) Px(20px) smartphone_Pb(0px) smartphone_Mb(0px)" data-reactid="2" data-test="quote-header" data-yaft-module="tdv2-applet-QuoteHeader" id="quote-header-info"> <div class="W(100%) Bdts(s) Bdtw(7px) Bdtc($negativeColor)" data-reactid="3"> </div> <div class="Mt(15px)" data-reactid="4"> <div class="D(ib) Mt(-5px) Mend(20px) Maw(56%)--tab768 Maw(52%) Ov(h) smartphone_Maw(85%) smartphone_Mend(0px)" data-reactid="5"> <div class="D(ib)" data-reactid="6"> <h1 class="D(ib) Fz(18px)" data-reactid="7"> iShares Core S&amp;P/ASX 200 ETF (IOZ.AX) </h1> </div> <div class="C($tertiaryColor) Fz(12px)" data-reactid="8"> <span data-reactid="9"> ASX - ASX Delayed Price. Currency in AUD </span> </div> </div> <div class="D(ib) Va(t) Mend(15px) smartphone_Mend(0px) smartphone_Fl(end) smartphone_Mt(0px)" data-reactid="10"> <div class="qsp-watchlist-add Td(u):h Pos(r)" data-reactid="11" data-test="dropdown"> <div class="Pos(r) D(ib) Cur(p)" data-reactid="12" tabindex="0"> <div class="addButton Cur(p) Pstart(13px) Pend(16px) Pt(5px) Pb(7px) Fz(12px) Fw(500) C($tertiaryColor) Bd Bdc($linkColor) Bdrs(15px) Bgc($linkColor):h C(white):h" data-reactid="13"> <svg class="Mend(5px) addButton:h_Stk(white)! addButton:h_Fill(white)! Cur(p)" data-icon="star" data-reactid="14" height="16" style="fill:#0081f2;stroke:#0081f2;stroke-width:0;vertical-align:bottom;" viewbox="0 0 24 24" width="16"> <path d="M8.485 7.83l-6.515.21c-.887.028-1.3 1.117-.66 1.732l4.99 4.78-1.414 6.124c-.2 1.14.767 1.49 1.262 1.254l5.87-3.22 5.788 3.22c.48.228 1.464-.097 1.26-1.254l-1.33-6.124 4.962-4.78c.642-.615.228-1.704-.658-1.732l-6.486-.21-2.618-6.22c-.347-.815-1.496-.813-1.84.003L8.486 7.83zm7.06 6.05l1.11 5.11-4.63-2.576L7.33 18.99l1.177-5.103-4.088-3.91 5.41-.18 2.19-5.216 2.19 5.216 5.395.18-4.06 3.903z" data-reactid="15"> </path> </svg> <span class="D(n)--tab768 Mend(1px) Va(tb)" data-reactid="16"> <span data-reactid="17"> Add to watchlist </span> </span> </div> </div> </div> </div> <!-- react-empty: 18 --> <div class="D(ib) Fl(end) W(300px) Cl(end)--mobxl W(250px)--tab768" data-reactid="19"> <div class="Pos(r) D(ib) Mend(10px) Va(m) W(100%)" data-reactid="20" data-test="add-symbol-overlay" data-yaft-module="tdv2-applet-SymbolLookup"> <div class="clear-button-inside Pos(r) react-autocomplete-box" data-reactid="21"> <div class="Cf" data-reactid="22"> <fieldset class="Pos(r) D(ib) W(100%)" data-reactid="23"> <input aria-label="Quote Lookup" autocapitalize="none" autocomplete="off" autocorrect="off" class="Bdrs(0) Bxsh(n)! Fz(s) Bxz(bb) D(ib) Bg(n) Pend(5px) Px(8px) Py(0) H(30px) Lh(30px) Bd O(n):f O(n):h Bdc($seperatorColor) Bdc($linkColor):f Bdc($c-fuji-punch-a):inv C($negativeColor):in M(0) Pstart(10px) Bxz(bb) Bgc(white) W(100%) H(32px)! Lh(32px)! Ff($yahooSansFinanceFont)" data-reactid="24" name="s" placeholder="Quote Lookup" spellcheck="false" tabindex="1" type="text"/> </fieldset> <button class="Bdrs(2px) Td(n) Fz(s) D(ib) Bxz(bb) Py(0) Px(10px) H(30px) Lh(30px) Bd Bgc($linkColor) Bgc($linkActiveColor):h C(white) C(#aaa):di Bdc($linkColor) Bdc($seperatorColor):di Bg($seperatorColor):di H(32px)! Lh(n)! Va(m) Pos(a) Fl(end) End(1px)" data-reactid="25" type="submit"> <svg class="Fill(white) Stroke(white) Cur(p)" data-icon="search" data-reactid="26" height="20" style="stroke-width:0;vertical-align:bottom;" viewbox="0 0 24 24" width="20"> <path d="M9 3C5.686 3 3 5.686 3 9c0 3.313 2.686 6 6 6s6-2.687 6-6c0-3.314-2.686-6-6-6m13.713 19.713c-.387.388-1.016.388-1.404 0l-7.404-7.404C12.55 16.364 10.85 17 9 17c-4.418 0-8-3.582-8-8 0-4.42 3.582-8 8-8s8 3.58 8 8c0 1.85-.634 3.55-1.69 4.905l7.403 7.404c.39.386.39 1.015 0 1.403" data-reactid="27"> </path> </svg> </button> </div> <!-- react-text: 28 --> <!-- /react-text --> </div> </div> </div> </div> <div class="My(6px) Pos(r) smartphone_Mt(6px)" data-reactid="29"> <div class="D(ib) Va(m) Maw(65%) Ov(h)" data-reactid="30"> <div class="D(ib) Mend(20px)" data-reactid="31"> <span class="Trsdu(0.3s) Fw(b) Fz(36px) Mb(-4px) D(ib)" data-reactid="32"> 30.39 </span> <span class="Trsdu(0.3s) Fw(500) Pstart(10px) Fz(24px) C($negativeColor)" data-reactid="33"> -0.32 (-1.04%) </span> <div class="C($tertiaryColor) D(b) Fz(12px) Fw(n) Mstart(0)--mobpsm Mt(6px)--mobpsm" data-reactid="34" id="quote-market-notice"> <span data-reactid="35"> As of 11:10AM AEST. Market open. </span> </div> </div> </div> <div class="Pos(r) Z(5) D(ib) Mstart(30px) Va(t) uba-container" data-reactid="36"> <div class="uba-container D-n D(n)" data-reactid="37" id="defaultTRADENOW-sizer"> <!-- react-text: 38 --> <!-- /react-text --> <div class="" data-reactid="39" id="defaultTRADENOW-wrapper"> <div class="" id="defaultdestTRADENOW" style=""> </div> </div> </div> </div> </div> </div>
Reply


Messages In This Thread
RE: Inconsistent behaviour in output - web scraping - by Larz60+ - Sep-20-2021, 01:54 AM

Possibly Related Threads…
Thread Author Replies Views Last Post
  logger behaviour setdetnet 1 907 Apr-15-2023, 05:20 AM
Last Post: Gribouillis
  can someone explain this __del__ behaviour? rjdegraff42 1 744 Apr-12-2023, 03:25 PM
Last Post: deanhystad
  Asyncio weird behaviour vugz 2 1,277 Apr-09-2023, 01:48 AM
Last Post: vugz
  Read csv file with inconsistent delimiter gracenz 2 1,214 Mar-27-2023, 08:59 PM
Last Post: deanhystad
  Weird behaviour using if statement in python 3.10.8 mikepy 23 3,676 Jan-18-2023, 04:51 PM
Last Post: mikepy
  Inconsistent loop iteration behavior JonWayn 2 1,010 Dec-10-2022, 06:49 AM
Last Post: JonWayn
  Generator behaviour bla123bla 2 1,119 Jul-26-2022, 07:30 PM
Last Post: bla123bla
  ValueError: Found input variables with inconsistent numbers of samples saoko 0 2,493 Jun-16-2022, 06:59 PM
Last Post: saoko
  Loop Dict with inconsistent Keys Personne 1 1,621 Feb-05-2022, 03:19 AM
Last Post: Larz60+
  Inconsistent counting / timing with threading rantwhy 1 1,779 Nov-24-2021, 04:04 AM
Last Post: deanhystad

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020