Python Forum

Full Version: Please help - Code was working!
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Please find attached a python script. It was script was working last week but it does not work any longer.

The script pulls out data from an internet news calendar (forexfactory.com/calendar.php) -> outputs a CSV file from the internet calendar. I have very limited python skills and urgently need to get this script working. I have been playing around with the code and completely lost. Sad Please help. Script link: https://drive.google.com/file/d/0B9KF5ld...lOZlU/view

I get the following error message:

Python News Downloader Version 1.41
Downloading Current Week's Calendar from Forex Factory...
Error:
Traceback (most recent call last): File "PythonNewsDownlaoder.py", line 221, in <module> Table_thisweek = Process_Week(url, Year) File "PythonNewsDownlaoder.py", line 144, in Process_Week in_time = datetime.strptime(String , "%Y %a%b %d") File "C:\Program Files (x86)\Python35-32\lib\_strptime.py", line 510, in _strptime_datetime tt, fraction = _strptime(data_string, format) File "C:\Program Files (x86)\Python35-32\lib\_strptime.py", line 346, in _strptime data_string[found.end():]) ValueError: unconverted data remains:
#Version 1.41
# - Fixed small error with leap year
#Version 1.4
# - Fixed bug with wrong year in first week of next year
# - fixed error when all arguments are not given
# - added option for high impact news only
#Version 1.3
# - Added the option for future Weeks
#Version 1.2
# - Fixed bug when no news data on a particular day
#Version 1.1
# - Updated to include previous years
#Version 1.0
from bs4 import BeautifulSoup
import csv
import urllib
import os,sys, getopt
import re
import time
if sys.version_info[0] == 3:
from urllib.request import urlopen
else:
from urllib import urlopen
#set encoding to utf-8

from datetime import date, datetime, timedelta
HighImpactOnly = False
print("Python News Downloader Version 1.41 \n")
#argument handling for number of previous weeks to include and/or file name
try:
opts, args = getopt.getopt(sys.argv[1:], "hIf:w:o:", ["weeks=","future=","output="]) #colon ":" are those that require arguments
if not opts:
print("Using default parameters... \n")
opts = [('-w', '2'), ('-f', '1'), ('-o', 'news')]
arg =
except getopt.GetoptError as err:
# print help information and exit:
print(str(err)) # will print something like "option -x not recognized"
print("Usage:")
print("-w <number> : the number of weeks you would like to go back from current week")
print("-f <number> : the number of weeks you would like to go forward from current week")
print("-o <outputfile> : output the results to the filename you specify")
print("-I : will give you high impact news only")
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print("Usage:")
print("-w <number> : the number of weeks you would like to go back from current week")
print("-f <number> : the number of weeks you would like to go forward from current week")
print("-o <outputfile> : output the results to the filename you specify")
print("-I : will give you high impact news only")
sys.exit()
elif opt in ("-w", "--weeks"):
limitweeks = int(arg)
elif opt in ("-f", "--future"):
futureweeks = int(arg)
elif opt in ("-I"):
HighImpactOnly = True
elif opt in ("-o", "--output"):
if arg.find('.')!=-1:
filename, file_extension = os.path.splitext(arg)
Archive = filename + "_archive" + file_extension
CurrentWeek = arg
else:
Archive = arg + "_archive.csv"
CurrentWeek = arg + '.csv'
else:
print("Usage:")
print("-w <number> : the number of weeks you would like to go back from current week")
print("-f <number> : the number of weeks you would like to go forward from current week")
print("-o <outputfile> : output the results to the filename you specify")
print("-I : will give you high impact news only")
sys.exit()
try:
limitweeks
except NameError:
print("Using default previous week parameters of 2")
limitweeks=2

try:
futureweeks
except NameError:
print("Using default future week parameters of 1")
futureweeks=1

try:
Archive
except NameError:
print("Using default output files")
CurrentWeek = "news.csv"
Archive = "news_archive.csv"

def remove_non_ascii(text):
return re.sub(r'[^\x00-\x7f]',r'',text)

#function to check if time is in the right format
def isTimeFormat(input):
try:
datetime.strptime(input, "%I:%M%p")
return True
except ValueError:
return False

#function to output the calendar in a list given the specific forex factory url
def Process_Week(url,Get_Year):
# open page are search for relevent imformation
page = urlopen(url)
soup = BeautifulSoup(page,"html.parser")
Dates = soup.findAll("td", class_="calendar__cell calendar__date date")
Times = soup.findAll("td", class_="calendar__cell calendar__time time")
Currency = soup.findAll("td", class_="calendar__cell calendar__currency currency")
Impact = soup.findAll("span", {"class" : ["low", "medium", "high", "holiday"]})
Impact = Impact[4:]
Title = soup.findAll("span", class_="calendar__event-title")
Actual = soup.findAll("td", class_="calendar__cell calendar__actual actual")
Forecast = soup.findAll("td", class_="calendar__cell calendar__forecast forecast")
Previous = soup.findAll("td", class_="calendar__cell calendar__previous previous")
Event_id = soup.findAll("tr", attrs={"data-eventid":True})

Dates_1=
Times_1=
Currency_1=
Impact_1=
Impact_2=
Title_1=
Actual_1=
Forecast_1=
Previous_1 =
Revised_1 =
Event_id_1 =
index_number=0
#filling in blanks with previous value
for index, row in enumerate(Dates):
if (row.text == ''):
Dates_1.append(Dates[index_number].text)
else:
index_number = index
Dates_1.append(row.text)
#Organize Date array to specific format
for index, item in enumerate(Dates_1):
Dates_1[index] = item[3:]
This_Year = Get_Year
String = str(This_Year) + " " + item
in_time = datetime.strptime(String , "%Y %a%b %d")
#if month is Jan and url is Dec last year, adjust year to next year
if ((in_time.month == 1) & (url[-10:-7] == "Dec")):
This_Year = int(Get_Year) + 1
String = str(This_Year) + " " + item
in_time = datetime.strptime(String , "%Y %a%b %d")
out_time = datetime.strftime(in_time, "%Y/%m/%d")
Dates_1[index] = '"%s"'%out_time

#Extracting the relevent data points from the HTML
#Note: "\xc2\xa0" represents that silly current time arrow
index_number=0
for index, row in enumerate(Times):
if row.text == '':
Caltime = remove_non_ascii(Times[index_number].text)
if (isTimeFormat(Caltime) == False):
Caltime = "12:01am"
else:
if (isTimeFormat(remove_non_ascii(row.text)) == False):
Caltime = "12:01am"
else:
index_number = index
Caltime = remove_non_ascii(row.text)
in_time = datetime.strptime(Caltime, "%I:%M%p")
in_time = in_time + timedelta(hours=0) #time offset if required
out_time = datetime.strftime(in_time, "%H:%M") # change to 24H
Times_1.append('"%s"'%out_time)

for row in Currency:
Currency_1.append('"%s"'%row.text)
for row in Impact:
CalImpact=row['class'][0]
if CalImpact == 'medium':
Impact_1.append('"M"')
elif CalImpact == 'high':
Impact_1.append('"H"')
elif CalImpact == 'low':
Impact_1.append('"L"')
else:
Impact_1.append('"N"')

index_number=0
for index, row in enumerate(Title):
Title_1.append('"%s"'%row.text)
if (Title_1[index] == '""'):
Impact_2.append('"N"')
else:
Impact_2.append(Impact_1[index_number])
index_number = index_number+1

for row in Actual:
Actual_1.append('"%s"'%row.text)
for row in Forecast:
Forecast_1.append('"%s"'%row.text)
for row in Previous:
Previous_1.append('"%s"'%row.text)
Revised = row.find(class_="revised")
if Revised != None:
Revised_1.append('"%s"'%Revised["title"])
else:
s=""
Revised_1.append('"%s"'%s)
for row in Event_id:
Event_id_1.append('"%s"'%row['data-eventid'])

#organize data into table array then transpose so it can be written to csv per row
Table = [Dates_1, Times_1, Currency_1, Impact_2, Title_1, Actual_1, Forecast_1, Previous_1, Revised_1, Event_id_1]
Table = list(zip(*Table))
return Table




#download calendar for this week and last week
print("Downloading Current Week's Calendar from Forex Factory...")
url = 'http://www.forexfactory.com/calendar.php'
Year = date.today().year
Table_thisweek = Process_Week(url, Year)
Last_date = Table_thisweek[0][0].replace("\"", "")
Last_time = Table_thisweek[0][1].replace("\"", "")
time_combines = Last_date + ' ' + Last_time

Weekbefore =
Weekafter =
Weekbefore_time = datetime.strptime(time_combines, "%Y/%m/%d %H:%M")
Weekafter_time = Weekbefore_time + timedelta(days=(futureweeks+1)*7)

#For Future Weeks
#Keep adding days until it reaches next Sunday
for count in range(0, futureweeks):
if Weekafter_time.weekday() == 6:
Weekafter_time -= timedelta(1)
while Weekafter_time.weekday() !=6:
Weekafter_time -= timedelta(1)
Weekafter_time_str = datetime.strftime(Weekafter_time, "%b%d.%Y")
url = 'http://www.forexfactory.com/calendar.php' + '?week=' + Weekafter_time_str
print("Downloading Future Week's Calendar (" + Weekafter_time_str + ") from Forex Factory...")
Year = datetime.strftime(Weekafter_time, "%Y")
Table_futureweek = Process_Week(url,Year)
Weekafter.append(Table_futureweek)

#For Previous Weeks
#Keep removing days until it reaches last Sunday
for count in range(0, limitweeks):
if Weekbefore_time.weekday() == 6:
Weekbefore_time -= timedelta(1)
while Weekbefore_time.weekday() !=6:
Weekbefore_time -= timedelta(1)
Weekbefore_time_str = datetime.strftime(Weekbefore_time, "%b%d.%Y")
url = 'http://www.forexfactory.com/calendar.php' + '?week=' + Weekbefore_time_str
print("Downloading Previous Week's Calendar (" + Weekbefore_time_str + ") from Forex Factory...")
Year = datetime.strftime(Weekbefore_time, "%Y")
Table_lastweek = Process_Week(url,Year)
Weekbefore.append(Table_lastweek)

#Remove all medium and low impact if option given
#Organize tiomes to 24H
if HighImpactOnly:
Table_1 =
for row in Table_thisweek:
if (row[3] != '\"L\"') and (row[3] != '\"M\"') and (row[3] != '\"\"')and (row[3] != '\"N\"' and (row[1] != '\"00:01\"')):
Table_1.append(row)
Table_thisweek = Table_1


#for python 3
if sys.version_info[0] == 3:
#Write the current weeks caneldnar to csv file
with open(CurrentWeek,'w', newline='') as f:
wr = csv.writer(f, delimiter=',',lineterminator='\r\n', quotechar="'")
wr.writerows(Table_thisweek)
f.close()
#for python 2
elif sys.version_info[0] == 2:
#Write the current weeks caneldnar to csv file
with open(CurrentWeek,'wb') as f:
wr = csv.writer(f, delimiter=',',lineterminator='\r\n', quotechar="'")
wr.writerows(Table_thisweek)
f.close()


#Join future weeks, previois weeks, and current week in list NewArchive_Table += Table_thisweek
NewArchive_Table =
if limitweeks !=0:
for count in range(limitweeks-1, -1, -1):
NewArchive_Table += Weekbefore[count]
NewArchive_Table += Table_thisweek
else:
NewArchive_Table = Table_thisweek

if futureweeks !=0:
for count in range(futureweeks-1, -1, -1):
NewArchive_Table += Weekafter[count]

if HighImpactOnly:
#Remove all medium and low impact if option given
#Organize tiomes to 24H
Table_1 =
for row in NewArchive_Table:#Table_thisweek:
if (row[3] != '\"L\"') and (row[3] != '\"M\"') and (row[3] != '\"\"')and (row[3] != '\"N\"' and (row[1] != '\"00:01\"')):
Table_1.append(row)
NewArchive_Table = Table_1

#for python 3
if sys.version_info[0] == 3:
#write/overwirte the archive file
with open(Archive,'w', newline='') as f:
wr = csv.writer(f, delimiter=',',lineterminator='\r\n', quotechar="'")
wr.writerows(NewArchive_Table)
f.close()
#for python 2
elif sys.version_info[0] == 2:
#write/overwirte the archive file
with open(Archive,'wb') as f:
wr = csv.writer(f, delimiter=',',lineterminator='\r\n', quotechar="'")
wr.writerows(NewArchive_Table)
f.close()
print("Downlaod Complete Successfully!")
print("Current week's calendar stored in: " + CurrentWeek)
print("Archive calendar stored in: " + Archive)

time.sleep(3)
Your first 100 lines can be condensed using the argparse module. I would strongly suggest you read this.

Quote:
Error:
in_time = datetime.strptime(String , "%Y %a%b %d")
it looks like your String is not as specified
(Sep-14-2017, 02:48 PM)metulburr Wrote: [ -> ]it looks like your String is not as specified

Are you able to please provide me the code on what would be the correct code for it to compile successfully?
Actually, you need to provide info on what your String looks like.