Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Discord - Webcrapping from a Forum
#1
Hello, I'm trying to get a a script to work but I'm having some issues. I need a bot to scrape the "Latest" post which has been posted on specific forum thread which then sends it to a discord channel.

I keep getting 401 error. I then changed it and added login credentials however even after that it failed to scrape the posts and send them to a discord. Any help would be much appreciated.

import discord
from discord.ext import commands
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlencode

# Discord bot token
TOKEN = 'Yes I have it'

# Forum URL to scrape
FORUM_URL = 'https://www.sythe.org/threads/vouches-for-mrtolska-tolska1/'

# Discord channel ID where the bot will send the messages
CHANNEL_ID = '1192589549428285510'

# Proxy API parameters
PROXY_API_KEY = '8710f234-abf0-492f-9ba6-b1b02d14f396 '

intents = discord.Intents.default()
intents.messages = True  # Enable the intent to receive message events

client = commands.Bot(command_prefix='!', intents=intents)

@client.event
async def on_ready():
    print('Logged in as', client.user.name)

    # Fetch latest posts
    latest_posts = fetch_latest_posts()
    
    # Send latest posts to the specified channel
    channel = client.get_channel(int(CHANNEL_ID))
    for post in latest_posts:
        await channel.send(post)

def fetch_latest_posts():
    # Proxy parameters
    proxy_params = {
        'api_key': PROXY_API_KEY,
        'url': FORUM_URL,
    }

    try:
        # Requesting through proxy
        response = requests.get(
            url='https://proxy.scrapeops.io/v1/',
            params=urlencode(proxy_params),
            timeout=120,
        )

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            # Extracting the latest posts
            latest_posts = []
            posts = soup.find_all('div', class_='post')
            for post in posts:
                post_content = post.find('div', class_='content').get_text().strip()
                latest_posts.append(post_content)
            return latest_posts
        else:
            print("Failed to fetch forum page:", response.status_code)
            return []
    except Exception as e:
        print("An error occurred:", e)
        return []

client.run(TOKEN)
Reply


Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020