Python Forum
Thread Rating:
  • 2 Vote(s) - 3.5 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Please help - Code was working!
#1
Please find attached a python script. It was script was working last week but it does not work any longer.

The script pulls out data from an internet news calendar (forexfactory.com/calendar.php) -> outputs a CSV file from the internet calendar. I have very limited python skills and urgently need to get this script working. I have been playing around with the code and completely lost. Sad Please help. Script link: https://drive.google.com/file/d/0B9KF5ld...lOZlU/view

I get the following error message:

Python News Downloader Version 1.41
Downloading Current Week's Calendar from Forex Factory...
Error:
Traceback (most recent call last): File "PythonNewsDownlaoder.py", line 221, in <module> Table_thisweek = Process_Week(url, Year) File "PythonNewsDownlaoder.py", line 144, in Process_Week in_time = datetime.strptime(String , "%Y %a%b %d") File "C:\Program Files (x86)\Python35-32\lib\_strptime.py", line 510, in _strptime_datetime tt, fraction = _strptime(data_string, format) File "C:\Program Files (x86)\Python35-32\lib\_strptime.py", line 346, in _strptime data_string[found.end():]) ValueError: unconverted data remains:
#Version 1.41
# - Fixed small error with leap year
#Version 1.4
# - Fixed bug with wrong year in first week of next year
# - fixed error when all arguments are not given
# - added option for high impact news only
#Version 1.3
# - Added the option for future Weeks
#Version 1.2
# - Fixed bug when no news data on a particular day
#Version 1.1
# - Updated to include previous years
#Version 1.0
from bs4 import BeautifulSoup
import csv
import urllib
import os,sys, getopt
import re
import time
if sys.version_info[0] == 3:
from urllib.request import urlopen
else:
from urllib import urlopen
#set encoding to utf-8

from datetime import date, datetime, timedelta
HighImpactOnly = False
print("Python News Downloader Version 1.41 \n")
#argument handling for number of previous weeks to include and/or file name
try:
opts, args = getopt.getopt(sys.argv[1:], "hIf:w:o:", ["weeks=","future=","output="]) #colon ":" are those that require arguments
if not opts:
print("Using default parameters... \n")
opts = [('-w', '2'), ('-f', '1'), ('-o', 'news')]
arg =
except getopt.GetoptError as err:
# print help information and exit:
print(str(err)) # will print something like "option -x not recognized"
print("Usage:")
print("-w <number> : the number of weeks you would like to go back from current week")
print("-f <number> : the number of weeks you would like to go forward from current week")
print("-o <outputfile> : output the results to the filename you specify")
print("-I : will give you high impact news only")
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print("Usage:")
print("-w <number> : the number of weeks you would like to go back from current week")
print("-f <number> : the number of weeks you would like to go forward from current week")
print("-o <outputfile> : output the results to the filename you specify")
print("-I : will give you high impact news only")
sys.exit()
elif opt in ("-w", "--weeks"):
limitweeks = int(arg)
elif opt in ("-f", "--future"):
futureweeks = int(arg)
elif opt in ("-I"):
HighImpactOnly = True
elif opt in ("-o", "--output"):
if arg.find('.')!=-1:
filename, file_extension = os.path.splitext(arg)
Archive = filename + "_archive" + file_extension
CurrentWeek = arg
else:
Archive = arg + "_archive.csv"
CurrentWeek = arg + '.csv'
else:
print("Usage:")
print("-w <number> : the number of weeks you would like to go back from current week")
print("-f <number> : the number of weeks you would like to go forward from current week")
print("-o <outputfile> : output the results to the filename you specify")
print("-I : will give you high impact news only")
sys.exit()
try:
limitweeks
except NameError:
print("Using default previous week parameters of 2")
limitweeks=2

try:
futureweeks
except NameError:
print("Using default future week parameters of 1")
futureweeks=1

try:
Archive
except NameError:
print("Using default output files")
CurrentWeek = "news.csv"
Archive = "news_archive.csv"

def remove_non_ascii(text):
return re.sub(r'[^\x00-\x7f]',r'',text)

#function to check if time is in the right format
def isTimeFormat(input):
try:
datetime.strptime(input, "%I:%M%p")
return True
except ValueError:
return False

#function to output the calendar in a list given the specific forex factory url
def Process_Week(url,Get_Year):
# open page are search for relevent imformation
page = urlopen(url)
soup = BeautifulSoup(page,"html.parser")
Dates = soup.findAll("td", class_="calendar__cell calendar__date date")
Times = soup.findAll("td", class_="calendar__cell calendar__time time")
Currency = soup.findAll("td", class_="calendar__cell calendar__currency currency")
Impact = soup.findAll("span", {"class" : ["low", "medium", "high", "holiday"]})
Impact = Impact[4:]
Title = soup.findAll("span", class_="calendar__event-title")
Actual = soup.findAll("td", class_="calendar__cell calendar__actual actual")
Forecast = soup.findAll("td", class_="calendar__cell calendar__forecast forecast")
Previous = soup.findAll("td", class_="calendar__cell calendar__previous previous")
Event_id = soup.findAll("tr", attrs={"data-eventid":True})

Dates_1=
Times_1=
Currency_1=
Impact_1=
Impact_2=
Title_1=
Actual_1=
Forecast_1=
Previous_1 =
Revised_1 =
Event_id_1 =
index_number=0
#filling in blanks with previous value
for index, row in enumerate(Dates):
if (row.text == ''):
Dates_1.append(Dates[index_number].text)
else:
index_number = index
Dates_1.append(row.text)
#Organize Date array to specific format
for index, item in enumerate(Dates_1):
Dates_1[index] = item[3:]
This_Year = Get_Year
String = str(This_Year) + " " + item
in_time = datetime.strptime(String , "%Y %a%b %d")
#if month is Jan and url is Dec last year, adjust year to next year
if ((in_time.month == 1) & (url[-10:-7] == "Dec")):
This_Year = int(Get_Year) + 1
String = str(This_Year) + " " + item
in_time = datetime.strptime(String , "%Y %a%b %d")
out_time = datetime.strftime(in_time, "%Y/%m/%d")
Dates_1[index] = '"%s"'%out_time

#Extracting the relevent data points from the HTML
#Note: "\xc2\xa0" represents that silly current time arrow
index_number=0
for index, row in enumerate(Times):
if row.text == '':
Caltime = remove_non_ascii(Times[index_number].text)
if (isTimeFormat(Caltime) == False):
Caltime = "12:01am"
else:
if (isTimeFormat(remove_non_ascii(row.text)) == False):
Caltime = "12:01am"
else:
index_number = index
Caltime = remove_non_ascii(row.text)
in_time = datetime.strptime(Caltime, "%I:%M%p")
in_time = in_time + timedelta(hours=0) #time offset if required
out_time = datetime.strftime(in_time, "%H:%M") # change to 24H
Times_1.append('"%s"'%out_time)

for row in Currency:
Currency_1.append('"%s"'%row.text)
for row in Impact:
CalImpact=row['class'][0]
if CalImpact == 'medium':
Impact_1.append('"M"')
elif CalImpact == 'high':
Impact_1.append('"H"')
elif CalImpact == 'low':
Impact_1.append('"L"')
else:
Impact_1.append('"N"')

index_number=0
for index, row in enumerate(Title):
Title_1.append('"%s"'%row.text)
if (Title_1[index] == '""'):
Impact_2.append('"N"')
else:
Impact_2.append(Impact_1[index_number])
index_number = index_number+1

for row in Actual:
Actual_1.append('"%s"'%row.text)
for row in Forecast:
Forecast_1.append('"%s"'%row.text)
for row in Previous:
Previous_1.append('"%s"'%row.text)
Revised = row.find(class_="revised")
if Revised != None:
Revised_1.append('"%s"'%Revised["title"])
else:
s=""
Revised_1.append('"%s"'%s)
for row in Event_id:
Event_id_1.append('"%s"'%row['data-eventid'])

#organize data into table array then transpose so it can be written to csv per row
Table = [Dates_1, Times_1, Currency_1, Impact_2, Title_1, Actual_1, Forecast_1, Previous_1, Revised_1, Event_id_1]
Table = list(zip(*Table))
return Table




#download calendar for this week and last week
print("Downloading Current Week's Calendar from Forex Factory...")
url = 'http://www.forexfactory.com/calendar.php'
Year = date.today().year
Table_thisweek = Process_Week(url, Year)
Last_date = Table_thisweek[0][0].replace("\"", "")
Last_time = Table_thisweek[0][1].replace("\"", "")
time_combines = Last_date + ' ' + Last_time

Weekbefore =
Weekafter =
Weekbefore_time = datetime.strptime(time_combines, "%Y/%m/%d %H:%M")
Weekafter_time = Weekbefore_time + timedelta(days=(futureweeks+1)*7)

#For Future Weeks
#Keep adding days until it reaches next Sunday
for count in range(0, futureweeks):
if Weekafter_time.weekday() == 6:
Weekafter_time -= timedelta(1)
while Weekafter_time.weekday() !=6:
Weekafter_time -= timedelta(1)
Weekafter_time_str = datetime.strftime(Weekafter_time, "%b%d.%Y")
url = 'http://www.forexfactory.com/calendar.php' + '?week=' + Weekafter_time_str
print("Downloading Future Week's Calendar (" + Weekafter_time_str + ") from Forex Factory...")
Year = datetime.strftime(Weekafter_time, "%Y")
Table_futureweek = Process_Week(url,Year)
Weekafter.append(Table_futureweek)

#For Previous Weeks
#Keep removing days until it reaches last Sunday
for count in range(0, limitweeks):
if Weekbefore_time.weekday() == 6:
Weekbefore_time -= timedelta(1)
while Weekbefore_time.weekday() !=6:
Weekbefore_time -= timedelta(1)
Weekbefore_time_str = datetime.strftime(Weekbefore_time, "%b%d.%Y")
url = 'http://www.forexfactory.com/calendar.php' + '?week=' + Weekbefore_time_str
print("Downloading Previous Week's Calendar (" + Weekbefore_time_str + ") from Forex Factory...")
Year = datetime.strftime(Weekbefore_time, "%Y")
Table_lastweek = Process_Week(url,Year)
Weekbefore.append(Table_lastweek)

#Remove all medium and low impact if option given
#Organize tiomes to 24H
if HighImpactOnly:
Table_1 =
for row in Table_thisweek:
if (row[3] != '\"L\"') and (row[3] != '\"M\"') and (row[3] != '\"\"')and (row[3] != '\"N\"' and (row[1] != '\"00:01\"')):
Table_1.append(row)
Table_thisweek = Table_1


#for python 3
if sys.version_info[0] == 3:
#Write the current weeks caneldnar to csv file
with open(CurrentWeek,'w', newline='') as f:
wr = csv.writer(f, delimiter=',',lineterminator='\r\n', quotechar="'")
wr.writerows(Table_thisweek)
f.close()
#for python 2
elif sys.version_info[0] == 2:
#Write the current weeks caneldnar to csv file
with open(CurrentWeek,'wb') as f:
wr = csv.writer(f, delimiter=',',lineterminator='\r\n', quotechar="'")
wr.writerows(Table_thisweek)
f.close()


#Join future weeks, previois weeks, and current week in list NewArchive_Table += Table_thisweek
NewArchive_Table =
if limitweeks !=0:
for count in range(limitweeks-1, -1, -1):
NewArchive_Table += Weekbefore[count]
NewArchive_Table += Table_thisweek
else:
NewArchive_Table = Table_thisweek

if futureweeks !=0:
for count in range(futureweeks-1, -1, -1):
NewArchive_Table += Weekafter[count]

if HighImpactOnly:
#Remove all medium and low impact if option given
#Organize tiomes to 24H
Table_1 =
for row in NewArchive_Table:#Table_thisweek:
if (row[3] != '\"L\"') and (row[3] != '\"M\"') and (row[3] != '\"\"')and (row[3] != '\"N\"' and (row[1] != '\"00:01\"')):
Table_1.append(row)
NewArchive_Table = Table_1

#for python 3
if sys.version_info[0] == 3:
#write/overwirte the archive file
with open(Archive,'w', newline='') as f:
wr = csv.writer(f, delimiter=',',lineterminator='\r\n', quotechar="'")
wr.writerows(NewArchive_Table)
f.close()
#for python 2
elif sys.version_info[0] == 2:
#write/overwirte the archive file
with open(Archive,'wb') as f:
wr = csv.writer(f, delimiter=',',lineterminator='\r\n', quotechar="'")
wr.writerows(NewArchive_Table)
f.close()
print("Downlaod Complete Successfully!")
print("Current week's calendar stored in: " + CurrentWeek)
print("Archive calendar stored in: " + Archive)

time.sleep(3)
Reply
#2
Your first 100 lines can be condensed using the argparse module. I would strongly suggest you read this.

Quote:
Error:
in_time = datetime.strptime(String , "%Y %a%b %d")
it looks like your String is not as specified
Recommended Tutorials:
Reply
#3
(Sep-14-2017, 02:48 PM)metulburr Wrote: it looks like your String is not as specified

Are you able to please provide me the code on what would be the correct code for it to compile successfully?
Reply
#4
Actually, you need to provide info on what your String looks like.
Reply


Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020