Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Error 403 Scraping website
#11
from bs4 import BeautifulSoup
import requests
import re
import mysql.connector
from mysql.connector import errorcode

mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  database="my_cataviz"
)

mycursor = mydb.cursor()

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': 'en-US,en;q=0.9',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1'
}
 
#response = requests.get('https://www.forebet.com/en/football-predictions', headers=headers)
response = requests.get('https://www.forebet.com/en/live-football-tips', headers=headers)
soup = BeautifulSoup(response.content, 'lxml')
rows = soup.find_all('div', {'class': 'rcnt'})



sql = "INSERT IGNORE forebetlive (hometeam,awayTeam,pscore,avgscore,probability) VALUES (%s, %s, %s, %s, %s)"
        
for r in rows:
    hometeam = r.find('span',class_= 'homeTeam').text
    awayTeam = r.find('span',class_= 'awayTeam').text
    pscore = r.find('span', class_='forepr').text
    
    try:
        #correctscore = r.find('div', class_='ex_sc tabonly').text
        avgscore = r.find('div', class_='avg_sc tabonly').text
        probability = r.find('span', class_='fpr').text
        fprc = r.select_one('.fprc')
        #print(fprc)
        pro = re.findall(r'\d+', fprc.decode())
        p1, x, p2 = pro
        val = (hometeam, awayTeam,pscore, avgscore,probability)
        mycursor.execute(sql,val)
    except AttributeError:
        pass



mydb.commit()

print(mycursor.rowcount, "records inserted.")
    
i am tryng to save datas on db.. and all is fine but i am not able to add this:
rows = soup.select_one('.contentmiddle')
for r in rows.select('.l_scr'):
    print(r.text)
to add the result in
sql = "INSERT IGNORE forebetlive (hometeam,awayTeam,pscore,avgscore,probability,lscr) VALUES (%s, %s, %s, %s, %s, %s)"
Reply
#12
Can do something like this.
from bs4 import BeautifulSoup
import requests
import re
from pprint import pprint

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': 'en-US,en;q=0.9',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1'
}

response = requests.get('https://www.forebet.com/en/live-football-tips', headers=headers)
soup = BeautifulSoup(response.content, 'lxml')
rows = soup.find_all('div', {'class': 'rcnt'})

lst_1 = []
for r in rows:
    hometeam = r.find('span',class_= 'homeTeam').text
    awayTeam = r.find('span',class_= 'awayTeam').text
    pscore = r.find('span', class_='forepr').text
    try:
        #correctscore = r.find('div', class_='ex_sc tabonly').text
        avgscore = r.find('div', class_='avg_sc tabonly').text
        probability = r.find('span', class_='fpr').text
        fprc = r.select_one('.fprc')
        pro = re.findall(r'\d+', fprc.decode())
        p1, x, p2 = pro
        val = (hometeam, awayTeam, pscore, avgscore, probability)
        #print(val)
        lst_1.append(val)
    except AttributeError:
        pass

lst_2 = []
rows = soup.select_one('.contentmiddle')
for r in rows.select('.l_scr'):
    #print(r.text)
    lst_2.append(r.text)

data = zip(lst_1, lst_2)
data_one = tuple(inner + (score,) for inner, score in data)
pprint(data_one)
Output:
(('Al Oruba (UAE)', 'Dubba Al Fujairah', 'X', '3.13', '36', '0 - 0'), ('Hellas Verona', 'SSC Napoli', '2', '2.33', '99', '1 - 3'), ('Gaziantep B.B.', 'Antalyaspor', '1', '2.90', '42', '1 - 0'), ('Konyaspor', 'Pendikspor', 'X', '1.38', '47', '1 - 1'), ('Degerfors IF', 'Kalmar FF', '2', '2.92', '99', '1 - 3'), ('FK Bodo/Glimt', 'Sandefjord', '1', '3.92', '99', '4 - 3'), ('SM Caen', 'AJ Auxerre', '2', '2.22', '50', '1 - 1'), ('Puszcza', 'Cracovia', '1', '1.50', '44', '1 - 0'), ('Marek', 'Svoge', '1', '2.88', '98', '2 - 0'), .....
cartonics likes this post
Reply
#13
from bs4 import BeautifulSoup
import requests
import re
from pprint import pprint
import mysql.connector
from mysql.connector import errorcode

mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  database="my_cataviz"
)

mycursor = mydb.cursor()

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': 'en-US,en;q=0.9',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1'
}
 
response = requests.get('https://www.forebet.com/en/live-football-tips', headers=headers)
soup = BeautifulSoup(response.content, 'lxml')
rows = soup.find_all('div', {'class': 'rcnt'})

sql = "INSERT IGNORE forebetlive (hometeam,awayTeam,pscore,avgscore,probability,result) VALUES (%s, %s, %s, %s, %s, %s)"
   
lst_1 = []
for r in rows:
    hometeam = r.find('span',class_= 'homeTeam').text
    awayTeam = r.find('span',class_= 'awayTeam').text
    pscore = r.find('span', class_='forepr').text
    try:
        #correctscore = r.find('div', class_='ex_sc tabonly').text
        avgscore = r.find('div', class_='avg_sc tabonly').text
        probability = r.find('span', class_='fpr').text
        fprc = r.select_one('.fprc')
        pro = re.findall(r'\d+', fprc.decode())
        p1, x, p2 = pro
        val = (hometeam, awayTeam, pscore, avgscore, probability)
        #print(val)
        lst_1.append(val)
    except AttributeError:
        pass
 
lst_2 = []
rows = soup.select_one('.contentmiddle')
for r in rows.select('.l_scr'):
    #print(r.text)
    lst_2.append(r.text)
 
data = zip(lst_1, lst_2)
data_one = tuple(inner + (score,) for inner, score in data)
pprint(data_one)
mycursor.execute(sql, data_one)
 
mydb.commit()

print(mycursor.rowcount, "records inserted.")
Error:
mysql.connector.errors.ProgrammingError: Failed processing format-parameters; Python 'tuple' cannot be converted to a MySQL type
while if i use:
mycursor.execute(sql, tuple(inner + (score,) for inner, score in data))
Error:
mysql.connector.errors.ProgrammingError: 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '%s, %s, %s, %s, %s, %s)' at line 1
Reply
#14
You most use cursor.executemany,here a working example.
import mysql.connector

db_config = {
    'host': "localhost",
    'user': 'root',
    'passwd': 'xxxxxx',
    'database': 'xxxxxx'
}

conn = mysql.connector.connect(**db_config)
cursor = conn.cursor()

# SQL query for inserting data
insert_query = '''\
INSERT INTO matches (team1, team2, outcome, odds, popularity, score)
VALUES (%s, %s, %s, %s, %s, %s)'''

data_one = (
 ('Santa Lucia', 'Gudja United', 'X', '2.40', '54', '0 - 0'),
 ('Stade Marocain', 'OC Khourigba', 'X', '1.88', '51', '0 - 0'),
 ('Bischofshofen', 'Rheindorf Altach II', '1', '2.20', '48', '1 - 0'),
 ('Hamrun Spartans', 'Sliema Wanderers', '1', '2.57', '48', '1 - 1'),
 ('Sirius IK', 'Brommapojkarna', '1', '3.65', '53', '2 - 2'),
 ('Slask Wroclaw', 'Legia Warszawa', '1', '2.30', '70', '3 - 0'),
 ('GKS Tychy', 'Podbeskidzie', '1', '2.67', '99', '3 - 0'),
 ('Dinamo Minsk', 'FK Minsk', '1', '2.43', '99', '1 - 0'),
 ('FK Gjøvik-Lyn', 'Alta IF', '2', '4.18', '55', '1 - 2'),
 ('Mouna', 'ASEC Mimosas', 'X', '0.50', '40', '2 - 1'),
 ('Atlético Malveira', 'AD Marco 09', 'X', '2.27', '38', '1 - 1'),
 ('Rail Club Kadiogo', 'ASF Bobo', '1', '1.94', '55', '1 - 0'),
 ('Platges de Calvià', 'Alaior', '1', '0.88', '49', '1 - 0'),
 ('Balsicas Atlético', 'CD Plus Ultra', '1', '1.50', '68', '3 - 1'),
 ('Renaissance Mons 44 W', 'Genk II W', '2', '3.63', '47', '2 - 3'),
 ('SD Deusto', 'SCD Durango', '1', '2.46', '46', '2 - 0'),
 ('Torino FC', 'Inter Milano', '2', '2.06', '51', '0 - 0'),
 ('Galatasaray SK', 'Besiktas JK', '1', '3.21', '48', '1 - 0'),
 ('Baltika Kaliningrad', 'FK Krasnodar', 'X', '1.88', '53', '2 - 1'),
 ('SK Brann', 'Molde FK', '2', '2.97', '41', '0 - 1'),
 )

# Insert data into the database
cursor.executemany(insert_query, data_one)
conn.commit()
cursor.close()
conn.close()
The Create Table command used:
CREATE TABLE matches (
    id INT AUTO_INCREMENT PRIMARY KEY,
    team1 VARCHAR(50) NOT NULL,
    team2 VARCHAR(50) NOT NULL,
    outcome VARCHAR(20) NOT NULL,
    odds DECIMAL(5,2) NOT NULL,
    popularity INT NOT NULL,
    score VARCHAR(10) NOT NULL
);
Reply
#15
lst_2 = []
rows = soup.select_one('.contentmiddle')
for r in rows.select('.l_scr'):
    #print(r.text)
    lst_2.append(r.text)
for r in rows.select('.l_min'):
    print(r.text)
    lst_2.append(r.text)
 
data = zip(lst_1, lst_2)
data_one = tuple(inner + (score,minute, ) for inner,score,minute in data)
pprint(data_one)
tryed to add minutes.. i i print r.text i can see minutes

Error:
File "\forebet1.py", line 62, in <genexpr> data_one = tuple(inner + (score,minute, ) for inner,score,minute in data) ValueError: not enough values to unpack (expected 3, got 2)
Reply
#16
maybe solved!

Thanks

lst_3 = []
rows = soup.select_one('.contentmiddle')
for r in rows.select('.l_min'):
 
    print(r.text)
    lst_3.append(r.text)

 
data = zip(lst_1, lst_2,lst_3)
data_one = tuple(inner + (score,minute, ) for inner,score,minute in data)
Reply
#17
can be done somethig to "click" on more button
https://www.forebet.com/en/football-tips...for-today/
to see all data and only the first data?
Reply
#18
(Oct-25-2023, 09:29 AM)cartonics Wrote: can be done somethig to "click" on more button
Yes,but then need something like Selenium.
There can be other ways(eg catch json respons),but that will be harder to do if new to this.

Here a working demo,so need to first click on agree button,then more button.
sleep so see what happes and not all at once,and how to use old BS code bye using browser.page_source
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time

# Setup
# https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/118.0.5993.54/win64/chromedriver-win64.zip
options = Options()
#options.add_argument("--headless=new")
ser = Service(r"C:\cmder\bin\chromedriver.exe")
browser = webdriver.Chrome(service=ser, options=options)
# Parse or automation
url = 'https://www.forebet.com/en/football-tips-and-predictions-for-today/'
browser.get(url)
time.sleep(3)
agree = browser.find_element(By.CSS_SELECTOR, 'button.fc-button.fc-cta-consent.fc-primary-button')
agree.click()
time.sleep(5)
try:
    more_bt = browser.find_element(By.CSS_SELECTOR, '#mrows > span')
    more_bt.click()
except:
    # If there is no <more> button on site
    pass

# Old code
time.sleep(5)
soup = BeautifulSoup(browser.page_source, 'lxml')
rows = soup.find_all('div', {'class': 'rcnt'})
for r in rows:
    hometeam = r.find('span',class_= 'homeTeam').text
    print(hometeam)
cartonics likes this post
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  Scraping Data from Website melkaray 3 803 Sep-22-2023, 12:41 PM
Last Post: melkaray

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020