Jun-10-2023, 06:34 AM
Hi all, I am one day old in Python development. I am using AI which already got me much further than I have ever expected to get. I got the bot to work basically. It scrapes data and prints it to my telegram channel. But it seems like it just posts one instance and after that, I get errors I don't understand, and shuts down the loops.
I am looking for tips and a better understanding for the code/error
I am looking for tips and a better understanding for the code/error
import os import re import requests import schedule import time from bs4 import BeautifulSoup from dotenv import load_dotenv from urllib.parse import urljoin, urlsplit from telegram import Bot, InputFile import asyncio load_dotenv() # Keep track of scraped data and downloaded images scraped_data = set() downloaded_images = set() # Read desired symbols from a text file with open("desired_symbols.txt", "r") as file: desired_symbols = [symbol.strip() for symbol in file.readlines() if symbol.strip()] # Set up Telegram bot telegram_token = "[hidden]" telegram_channel_id = "[hidden]" bot = Bot(token=telegram_token) async def send_message_async(chat_id, text): await bot.send_message(chat_id=chat_id, text=text) def scrape_website(): global scraped_data cookies = { } headers = { 'authority': 'www.tradingview.com', } response = requests.get('https://www.tradingview.com/ideas/followed-authors/', cookies=cookies, headers=headers) soup = BeautifulSoup(response.content, "html.parser") # Find all the idea titles and store them idea_titles = soup.find_all("a", class_="tv-widget-idea__title") title_list = [title.text.strip() for title in idea_titles] # Find all the idea symbols and store them idea_symbols = soup.find_all("a", class_="tv-widget-idea__symbol") symbol_list = [symbol.text.strip() for symbol in idea_symbols] # Find all the thumbnail image URLs and store them thumbnail_images = soup.find_all("img", class_="tv-widget-idea__cover") image_url_list = [image["data-src"] for image in thumbnail_images] # Create a directory to save the images directory = "thumbnails" if not os.path.exists(directory): os.makedirs(directory) # Download the thumbnail images and send them to Telegram for i, (image_url, title, symbol) in enumerate(zip(image_url_list, title_list, symbol_list)): if image_url in downloaded_images: print(f"Skipping image {i}. Already downloaded.") print() continue response = requests.get(image_url) if response.status_code == 200: # Get the file extension from the image URL file_extension = os.path.splitext(urlsplit(image_url).path)[1] # Generate a unique filename for each image filename = f"thumbnail_{i}{file_extension}" file_path = os.path.join(directory, filename) with open(file_path, "wb") as file: file.write(response.content) print(f"Thumbnail image {i} downloaded successfully.") # Add the image URL to the set of downloaded images downloaded_images.add(image_url) else: print(f"Failed to download thumbnail image {i}.") print() # Check if the symbol matches any desired symbol pattern if any(re.match(pattern, symbol) for pattern in desired_symbols): # Generate a unique identifier for the data data_id = f"{title}_{symbol}_{i}" # Check if the data has already been sent if data_id not in scraped_data: # Send the data to Telegram message = f"Title: {title}\nSymbol: {symbol}\nImage: {image_url}" asyncio.run(send_message_async(chat_id=telegram_channel_id, text=message)) print(f"Data sent to Telegram: {message}") # Add the data ID to the set of scraped data scraped_data.add(data_id) print() # Schedule the scraping job schedule.every(1).minute.do(scrape_website) while True: schedule.run_pending() time.sleep(1)
Error:C:\Users\PC\PycharmProjects\scrape\venv\Scripts\python.exe C:\Users\PC\PycharmProjects\scrape\scrapingtradingview.py
Thumbnail image 0 downloaded successfully.
Thumbnail image 1 downloaded successfully.
Data sent to Telegram: Title: Bitcoin - Crash is ready! Bear Flag is confirmed (must see)
Symbol: BTCUSDT
Image: https://s3.tradingview.com/l/ltjOHdi6_mid.png
Thumbnail image 2 downloaded successfully.
Traceback (most recent call last):
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpcore\_async\http11.py", line 88, in handle_async_request
await self._send_request_headers(**kwargs)
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpcore\_async\http11.py", line 134, in _send_request_headers
await self._send_event(event, timeout=timeout)
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpcore\_async\http11.py", line 152, in _send_event
await self._network_stream.write(bytes_to_send, timeout=timeout)
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpcore\backends\asyncio.py", line 51, in write
await self._stream.send(item=buffer)
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\anyio\streams\tls.py", line 203, in send
await self._call_sslobject_method(self._ssl_object.write, item)
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\anyio\streams\tls.py", line 169, in _call_sslobject_method
await self.transport_stream.send(self._write_bio.read())
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\anyio\_backends\_asyncio.py", line 1238, in send
self._transport.write(item)
File "C:\Users\PC\AppData\Local\Programs\Python\Python311\Lib\asyncio\proactor_events.py", line 365, in write
self._loop_writing(data=bytes(data))
File "C:\Users\PC\AppData\Local\Programs\Python\Python311\Lib\asyncio\proactor_events.py", line 401, in _loop_writing
self._write_fut = self._loop._proactor.send(self._sock, data)
^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'NoneType' object has no attribute 'send'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\telegram\request\_baserequest.py", line 277, in _request_wrapper
code, payload = await self.do_request(
^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\telegram\request\_httpxrequest.py", line 216, in do_request
res = await self._client.request(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpx\_client.py", line 1530, in request
return await self.send(request, auth=auth, follow_redirects=follow_redirects)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpx\_client.py", line 1617, in send
response = await self._send_handling_auth(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpx\_client.py", line 1645, in _send_handling_auth
response = await self._send_handling_redirects(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpx\_client.py", line 1682, in _send_handling_redirects
response = await self._send_single_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpx\_client.py", line 1719, in _send_single_request
response = await transport.handle_async_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpx\_transports\default.py", line 353, in handle_async_request
resp = await self._pool.handle_async_request(req)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpcore\_async\connection_pool.py", line 261, in handle_async_request
raise exc
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpcore\_async\connection_pool.py", line 245, in handle_async_request
response = await connection.handle_async_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpcore\_async\connection.py", line 96, in handle_async_request
return await self._connection.handle_async_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpcore\_async\http11.py", line 119, in handle_async_request
await self._response_closed()
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpcore\_async\http11.py", line 232, in _response_closed
await self.aclose()
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpcore\_async\http11.py", line 240, in aclose
await self._network_stream.aclose()
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\httpcore\backends\asyncio.py", line 54, in aclose
await self._stream.aclose()
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\anyio\streams\tls.py", line 193, in aclose
await self.transport_stream.aclose()
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\anyio\_backends\_asyncio.py", line 1261, in aclose
self._transport.close()
File "C:\Users\PC\AppData\Local\Programs\Python\Python311\Lib\asyncio\proactor_events.py", line 109, in close
self._loop.call_soon(self._call_connection_lost, None)
File "C:\Users\PC\AppData\Local\Programs\Python\Python311\Lib\asyncio\base_events.py", line 761, in call_soon
self._check_closed()
File "C:\Users\PC\AppData\Local\Programs\Python\Python311\Lib\asyncio\base_events.py", line 519, in _check_closed
raise RuntimeError('Event loop is closed')
RuntimeError: Event loop is closed
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\PC\PycharmProjects\scrape\scrapingtradingview.py", line 129, in <module>
schedule.run_pending()
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\schedule\__init__.py", line 822, in run_pending
default_scheduler.run_pending()
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\schedule\__init__.py", line 100, in run_pending
self._run_job(job)
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\schedule\__init__.py", line 172, in _run_job
ret = job.run()
^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\schedule\__init__.py", line 693, in run
ret = self.job_func()
^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\scrapingtradingview.py", line 118, in scrape_website
asyncio.run(send_message_async(chat_id=telegram_channel_id, text=message))
File "C:\Users\PC\AppData\Local\Programs\Python\Python311\Lib\asyncio\runners.py", line 190, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "C:\Users\PC\AppData\Local\Programs\Python\Python311\Lib\asyncio\runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\AppData\Local\Programs\Python\Python311\Lib\asyncio\base_events.py", line 653, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\scrapingtradingview.py", line 28, in send_message_async
await bot.send_message(chat_id=chat_id, text=text)
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\telegram\_bot.py", line 381, in decorator
result = await func(self, *args, **kwargs) # skipcq: PYL-E1102
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\telegram\_bot.py", line 807, in send_message
return await self._send_message(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\telegram\_bot.py", line 559, in _send_message
result = await self._post(
^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\telegram\_bot.py", line 469, in _post
return await self._do_post(
^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\telegram\_bot.py", line 497, in _do_post
return await request.post(
^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\telegram\request\_baserequest.py", line 168, in post
result = await self._request_wrapper(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\PC\PycharmProjects\scrape\venv\Lib\site-packages\telegram\request\_baserequest.py", line 293, in _request_wrapper
raise NetworkError(f"Unknown error in HTTP implementation: {repr(exc)}") from exc
telegram.error.NetworkError: Unknown error in HTTP implementation: RuntimeError('Event loop is closed')
Process finished with exit code 1
This is what I got already, very happy with the progress: