Dec-15-2022, 06:26 PM
I have a simple program in Tkinter that I'm using to scrape the age of various people from wikipedia just for webscraping practice.
I am able to scrape the age of each person one-by-one on one thread, but I'm trying to have one thread for each person to handle scraping their ages all at the same time so that the program will be much faster.
So, in other words, the program currently scrapes only 1 person at a time and can only return 1 row at a time in the Treeview, but I'd like to have it to where a thread works for each person at the same time (concurrently) so that the Treeview will return each person's age in one shot as well.
Here's the code that I've come up with so far:
So, how exactly do I create multiple threads to handle each person in the list and return them concurrently/simultaneously instead of row-by-row, one at a time?
I'd appreciate any support.
I am able to scrape the age of each person one-by-one on one thread, but I'm trying to have one thread for each person to handle scraping their ages all at the same time so that the program will be much faster.
So, in other words, the program currently scrapes only 1 person at a time and can only return 1 row at a time in the Treeview, but I'd like to have it to where a thread works for each person at the same time (concurrently) so that the Treeview will return each person's age in one shot as well.
Here's the code that I've come up with so far:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
from tkinter import Tk, Button, Listbox from tkinter import ttk import threading import requests import re #imports RegEx class MainWindow(Tk): def __init__( self ): super ().__init__() self .option_add( "*Font" , "poppins, 11 bold" ) self .lb1 = Listbox( self , width = 22 , cursor = 'hand2' ) self .lb1.pack(side = 'left' , fill = 'y' , padx = 20 , pady = 20 ) #create list of names names = [ 'Adam Levine' , 'Arnold Schwarzenegger' , 'Artur Beterbiev' , 'Chris Hemsworth' , 'Dan Henderson' , 'Dustin Poirier' , 'Fedor Emelianenko' , 'Gennady Golovkin' , 'Igor Vovchanchyn' , 'Ken Shamrock' , 'Mirko Cro Cop' , 'Oleksandr Usyk' , 'Ronnie Coleman' , 'Vasiliy Lomachenko' ] #populate listbox with names for name in names: self .lb1.insert( 'end' , name) self .tv1 = ttk.Treeview( self , show = 'tree headings' , cursor = 'hand2' ) columns = ( 'NAME' , 'AGE' ) self .tv1.config(columns = columns) style = ttk.Style() style.configure( "Treeview" , highlightthickness = 2 , bd = 0 , rowheight = 26 ,font = ( 'Poppins' , 11 )) # Modify the font of the body style.configure( "Treeview.Heading" , font = ( 'Poppins' , 12 , 'bold' )) # Modify the font of the headings #configure headers self .tv1.column( '#0' , width = 0 , stretch = 0 ) self .tv1.column( 'NAME' , width = 190 ) self .tv1.column( 'AGE' , width = 80 , stretch = 0 ) #define headings self .tv1.heading( 'NAME' , text = 'NAME' , anchor = 'w' ) self .tv1.heading( 'AGE' , text = 'AGE' , anchor = 'w' ) self .tv1.pack(fill = 'both' , expand = 1 , padx = ( 0 , 20 ), pady = 20 ) #create start button self .b1 = Button( self , text = 'START' , bg = 'green' , fg = 'white' , cursor = 'hand2' , command = self .start) self .b1.pack(pady = ( 0 , 20 )) #scrape data from WikiPedia.org def start( self ): for item in self .tv1.get_children(): self .tv1.delete(item) t1 = threading.Thread(target = self .scrape_wiki, daemon = True ) t1.start() def scrape_wiki( self ): for i in range ( self .lb1.size()): #select the name self .name = self .lb1.get(i).replace( ' ' , '_' ) # create a simple dictionary to hold the user agent inside of the headers headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0' } # the required first parameter of the 'get' method is the 'url': # regex setup regex = re.search( '(age \d+)' , r.text) age = regex.group( 0 ).replace( 'age ' , ' ').replace(' ) ', ' ') # Populate Treeview with row data self .name = self .name.replace( '_' , ' ' ) self .tv1.insert(parent = ' ', index=' end', iid = i, values = ( self .name, age)) if __name__ = = '__main__' : app = MainWindow() #app.iconbitmap('imgs/logo-icon.ico') app.title( 'Main Window' ) app.configure(bg = '#333333' ) #center the Main Window: w = 600 # Width h = 520 # Height screen_width = app.winfo_screenwidth() # Width of the screen screen_height = app.winfo_screenheight() # Height of the screen # Calculate Starting X and Y coordinates for Window x = (screen_width / 2 ) - (w / 2 ) y = (screen_height / 2 ) - (h / 2 ) app.mainloop() |
I'd appreciate any support.