I need some help with this code. It always returns the same (first) page over and over again instead of going to the next page. I can't figure out why this is the case. I guess the probem would be in the loop.
import csv import time from bs4 import BeautifulSoup def fetch_data(url): try: response = requests.get(url, verify=False) # Disable SSL verification response.raise_for_status() return response.json() except requests.exceptions.HTTPError as err: print(f"HTTP error occurred: {err}") except Exception as err: print(f"Other error occurred: {err}") return None def process_all_pages(base_url, per_page=25): all_data = [] page = 1 while True: url = f"{base_url}?page={page}&per_page={per_page}" print(f"Fetching data from: {url}") # Debug statement data = fetch_data(url) # Verbose debugging if data: print(f"Received response for page {page}: {data}") # Print the raw response else: print(f"No data received for page {page}. Exiting loop.") break if 'data' in data and isinstance(data['data'], list): num_items = len(data['data']) print(f"Page {page}: Received {num_items} items") # Debug statement all_data.extend(data['data']) # Check if there are more pages to fetch if num_items < per_page: # If fewer than per_page items are returned, this is the last page print(f"Page {page}: Last page reached with {num_items} items.") break else: print(f"Page {page}: No valid data or end of data reached") # Debug statement break page += 1 time.sleep(1) # Adjust based on the rate limit return all_data # Fetch all data from the API endpoint json_url = "https://samviewer.digile.be/nl/sam/ampps.json" all_data = process_all_pages(json_url) if all_data: with open('output.csv', mode='w', newline='', encoding='utf-8') as file: csv_writer = csv.writer(file) # Write the header row headers = ['CTI-extended', 'Naam', 'CNK publiek', 'Prijs publiek', 'Prijs af-fabriek', 'Vergunninghouder (verdeler)'] csv_writer.writerow(headers) for item in all_data: # Extracting values and cleaning HTML tags cti_ext = extract_text_from_html(item.get('cti_ext', '')) name = extract_text_from_html(item.get('name', '')) cnk_p = extract_text_from_html(item.get('cnk_p', '')) price_pub = extract_text_from_html(item.get('price_pub', '')) price_exf = extract_text_from_html(item.get('price_exf', '')) company = extract_text_from_html(item.get('company', '')) # Construct row row = [ cti_ext, name, cnk_p, price_pub, price_exf, company ] # Write the row to CSV only if at least one field has data if any(field for field in row): csv_writer.writerow(row) print(f"Data successfully written to output.csv. Total rows: {len(all_data)}") else: print("No data retrieved from the API.")
Larz60+ write Jul-23-2024, 08:54 AM:
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Tags have been added for you this time. Please use BBCode tags on future posts.
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Tags have been added for you this time. Please use BBCode tags on future posts.