Posts: 3
Threads: 1
Joined: Mar 2018
Mar-24-2018, 07:30 PM
(This post was last modified: Mar-24-2018, 07:30 PM by nutgut.)
Hi,
I have build a small program with Python3. I'm using urllib.request.urlopen() to download csv files from an open API.
I save the files with file.write(url). After the download the content is imported to a db (sqlite).
It works good but I wonder if this is a Pythonic way to do it? Is it possible to save the content in the csv file to a list instead of a file?
The files are maybe around 1 MB.
Should I keep it as it is or do you guys have a better idea?
I just want to learn the right way.
Posts: 12,030
Threads: 485
Joined: Sep 2016
I wrote this a while back and it works for me:
GetUrl.py
import requests
import socket
class GetUrl:
def __init__(self):
self.ok_status = 200
self.response = None
def check_availability(self):
self.internet_available = False
if socket.gethostbyname(socket.gethostname()) != '127.0.0.1':
self.internet_available = True
return self.internet_available
def fetch_url(self, url, bin=False):
self.response = None
if self.check_availability():
try:
if bin:
self.response = requests.get(url, stream=True, allow_redirects=False, timeout=3)
self.response.raise_for_status()
else:
self.response = requests.get(url, allow_redirects=False, timeout=3)
self.response.raise_for_status()
except requests.exceptions.HTTPError as errh:
print("Http Error:", errh)
except requests.exceptions.ConnectionError as errc:
print("Error Connecting:", errc)
except requests.exceptions.Timeout as errt:
print("Timeout Error:", errt)
except requests.exceptions.RequestException as err:
print("OOps: Something Else", err)
else:
print("Please check network connection and try again")
return self.response
def testit():
gu = GetUrl()
url = 'https://www.google.com/'
page = gu.fetch_url(url)
if page is not None:
if page.status_code == gu.ok_status:
print(page.text)
else:
print("Problem downloading page")
if __name__ == '__main__':
testit() So to use in another program:
Posts: 3
Threads: 1
Joined: Mar 2018
Thanks Larz60+,
Well I guess to fetch a network-page its good. My url starts a download (csv file), so I cant fetch it as you do.
So maybe I'm doing it the right way.
Would be fun to hear what others think about the solution.
(Mar-24-2018, 08:53 PM)Larz60+ Wrote: I wrote this a while back and it works for me:
GetUrl.py
import requests
import socket
class GetUrl:
def __init__(self):
self.ok_status = 200
self.response = None
def check_availability(self):
self.internet_available = False
if socket.gethostbyname(socket.gethostname()) != '127.0.0.1':
self.internet_available = True
return self.internet_available
def fetch_url(self, url, bin=False):
self.response = None
if self.check_availability():
try:
if bin:
self.response = requests.get(url, stream=True, allow_redirects=False, timeout=3)
self.response.raise_for_status()
else:
self.response = requests.get(url, allow_redirects=False, timeout=3)
self.response.raise_for_status()
except requests.exceptions.HTTPError as errh:
print("Http Error:", errh)
except requests.exceptions.ConnectionError as errc:
print("Error Connecting:", errc)
except requests.exceptions.Timeout as errt:
print("Timeout Error:", errt)
except requests.exceptions.RequestException as err:
print("OOps: Something Else", err)
else:
print("Please check network connection and try again")
return self.response
def testit():
gu = GetUrl()
url = 'https://www.google.com/'
page = gu.fetch_url(url)
if page is not None:
if page.status_code == gu.ok_status:
print(page.text)
else:
print("Problem downloading page")
if __name__ == '__main__':
testit() So to use in another program:
Posts: 7,318
Threads: 123
Joined: Sep 2016
Mar-26-2018, 05:15 PM
(This post was last modified: Mar-26-2018, 05:15 PM by snippsat.)
(Mar-26-2018, 11:39 AM)nutgut Wrote: Would be fun to hear what others think about the solution. The solution is okay,but he dos a lot error checking that can be confusing.
It it's simplest form,here download a CSV from web.
Always use Requests and not urllib.
import requests
url = 'http://www.sample-videos.com/csv/Sample-Spreadsheet-10-rows.csv'
url_get = requests.get(url)
# Download csv
with open('sample.csv', 'wb') as f_out:
f_out.write(url_get.content) Example parse out that link from the website,and the use it.
import requests
from bs4 import BeautifulSoup
url_csv = 'http://www.sample-videos.com/download-sample-csv.php'
url = requests.get(url_csv)
soup = BeautifulSoup(url.content, 'lxml')
h1 = soup.find('h1')
print(h1.text)
print('------------')
site = soup.find('a', class_="navbar-brand")
link = soup.find('a', class_="download_csv")
adress_csv = f"{site.get('href')}/{link.get('href')}"
print(adress_csv)
# Download csv
download_link = requests.get(adress_csv)
csv_url_name = adress_csv.split('/')[-1]
print(csv_url_name)
with open(csv_url_name, 'wb') as f_out:
f_out.write(download_link.content) Output: Download Sample CSV
------------
http://www.sample-videos.com/csv/Sample-Spreadsheet-10-rows.csv
Sample-Spreadsheet-10-rows.csv
Posts: 3
Threads: 1
Joined: Mar 2018
Thanks,
I will check up the requests.
(Mar-26-2018, 05:15 PM)snippsat Wrote: (Mar-26-2018, 11:39 AM)nutgut Wrote: Would be fun to hear what others think about the solution. The solution is okay,but he dos a lot error checking that can be confusing.
It it's simplest form,here download a CSV from web.
Always use Requests and not urllib.
import requests
url = 'http://www.sample-videos.com/csv/Sample-Spreadsheet-10-rows.csv'
url_get = requests.get(url)
# Download csv
with open('sample.csv', 'wb') as f_out:
f_out.write(url_get.content) Example parse out that link from the website,and the use it.
import requests
from bs4 import BeautifulSoup
url_csv = 'http://www.sample-videos.com/download-sample-csv.php'
url = requests.get(url_csv)
soup = BeautifulSoup(url.content, 'lxml')
h1 = soup.find('h1')
print(h1.text)
print('------------')
site = soup.find('a', class_="navbar-brand")
link = soup.find('a', class_="download_csv")
adress_csv = f"{site.get('href')}/{link.get('href')}"
print(adress_csv)
# Download csv
download_link = requests.get(adress_csv)
csv_url_name = adress_csv.split('/')[-1]
print(csv_url_name)
with open(csv_url_name, 'wb') as f_out:
f_out.write(download_link.content) Output: Download Sample CSV
------------
http://www.sample-videos.com/csv/Sample-Spreadsheet-10-rows.csv
Sample-Spreadsheet-10-rows.csv
|