So after 3 odd weeks of trying to find some time, I finally managed to get this set completed. The code below does the following things:
1) Opens A Firefox Instance,
2) Scrolls to the End of the Page
3) Reads the total number of overs a team has played
4) Counts the total number of dot balls
5) Pie Charts the results
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
driver = webdriver.Firefox()
driver.get('https://www.espncricinfo.com/series/8039/commentary/65234/australia-vs-pakistan-final-icc-world-cup-1999?innings=1')
SCROLL_PAUSE_TIME = 2
elm = driver.find_element_by_tag_name('html')
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to almost the bottom of the page
driver.execute_script("window.scrollTo(0, (document.body.scrollHeight-400));")
# Time Taken to Load the page
time.sleep(SCROLL_PAUSE_TIME)
# Scrolling Up & Down to load more Data
elm.send_keys(Keys.HOME)
time.sleep(1)
elm.send_keys(Keys.END)
time.sleep(1)
# Calculate the new scrolling height and then compare it to old height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
#Saving & Converting the entire souce of the Page into a variable
testVariable = driver.page_source
testVariable = str(testVariable)
vanillaData = testVariable.split('window.__INITIAL_STATE__ = {', 1)[0] + 'window.__INITIAL_STATE__ = {'
#------------------------------------------------------------------X-------------------------------------------------------------X
#Declaring dots as a string and counting Total number of dot balls
dots = ', no run,'
totalDots = vanillaData.count(dots)
print("Total Dot Balls Faced = " + str(totalDots))
#Finding Total Number of Balls/Overs In Innings
findTotalballData = vanillaData.find('<div class="time-stamp" data-reactid="')
conVersion_find = int(findTotalballData)
findValue = findTotalballData + 43
conVersion_findValue = int(findValue)
check_firstCharacter = vanillaData[conVersion_findValue]
secondCharacter = findValue + 1
check_secondCharacter = vanillaData[secondCharacter]
thirdCharacter = findValue + 2
check_thirdCharacter = vanillaData[thirdCharacter]
fourthCharacter = findValue + 3
check_fourthCharacter = vanillaData[fourthCharacter]
fifthCharacter = findValue + 4
check_fifthCharacter = vanillaData[fifthCharacter]
if check_fifthCharacter == '<':
sumOvers = check_firstCharacter + check_secondCharacter + check_thirdCharacter + check_fourthCharacter
else:
sumOvers = check_firstCharacter + check_secondCharacter + check_thirdCharacter + check_fourthCharacter + check_fifthCharacter
overs = sumOvers.split('.')[0]
balls = sumOvers.split('.')[1]
convert_overs = int(overs)
convert_balls = int(balls)
totalBalls = (convert_overs * 6) + convert_balls
conVersion_totalBalls = str(totalBalls)
print("Total Deliveries Faced = " + conVersion_totalBalls)
if convert_balls == 6:
totalOvers = convert_overs + 1
conVersion_totalOvers = float(totalOvers)
# round_totalOvers = round(totalOvers, 2)
# conVersion_round_totalOvers = str(round_totalOvers)
print("Total Overs Faced = " + str(conVersion_totalOvers))
else:
totalOvers = str(sumOvers)
print("Total Overs Faced = " + totalOvers)
#------------------------------------------------------------------------------X---------------------------------------------X
#Plotting the Bar Chart
forBarPlot = totalBalls - totalDots
# y-axis in bold
rc('font', weight='bold')
# Values of each group
bars1 = [totalDots]
bars2 = [forBarPlot]
# Heights of bars1 + bars2
bars = np.add(bars1, bars2).tolist()
# The position of the bar on the x-axis (Can start from 0,1,2,.....2 is the middle position)
r = [0, 1, 2, 3, 4, 5]
# Names of group and bar width
names = ['Dot Balls']
barWidth = 1
# Create orange bars
plt.bar(r[2], bars1, color='orange', edgecolor='white', width=barWidth)
# Create green bars (middle), on top of the firs ones
plt.bar(r[2], bars2, bottom=bars1, color='green', edgecolor='white', width=barWidth)
# Custom X axis
plt.xticks(r, names, fontweight='bold')
plt.xlabel("Graphical Represenation of Total Dot Balls")
# Show graphic
plt.show()
#-------------------------------------------------------------X---------------------------------------------------------------X
labels = 'Total Balls', 'Dot Balls'
sizes = [totalBalls, totalDots]
colors = ['yellowgreen', 'yellow']
explode = [0.1, 0] #Explode 1st Slice
#Plotting the Pie Chart
plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=140)
plt.axis('equal')
plt.show()
#Plotting the Pie Chart
plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=140)
plt.axis('equal')
plt.show()
# For Calculation we have totalBalls, totalOvers, totalDots
# f = open('html.txt', "a+")
# f.write(testVariable)
# f.close()
Many Thanks to @
metulburr for all his help on this problem set. The next phase of this project is to count runs scored by each batsmen along with individual dot balls and then charting the results as a table and exporting to CSV.
As of right now, the initial problem of scrolling and getting total dot balls has been successfully resolved! Thanks :)