import
pandas as pd
import
requests
from
bs4
import
BeautifulSoup
soup
=
BeautifulSoup(res.content,
'lxml'
)
table
=
soup.find_all(
'table'
)[
0
]
df
=
pd.read_html(
str
(table))
df
=
df[
0
]
pd.set_option(
'display.max_rows'
,
500
)
pd.set_option(
'display.max_columns'
,
500
)
pd.set_option(
'display.width'
,
1000
)
list
=
[]
for
i
in
df[
0
]:
list
.append(i)
list
.reverse()
size
=
len
(
list
)
idx_list
=
[idx
+
1
for
idx, val
in
enumerate
(
list
)
if
len
(val) >
2
]
res
=
[
list
[i: j]
for
i, j
in
zip
([
0
]
+
idx_list, idx_list
+
([size]
if
idx_list[
-
1
] !
=
size
else
[]))]
for
i
in
res:
for
j
in
range
(
len
(i)):
if
i[j].upper()
=
=
'JUNE'
:
i[j]
=
'6'
elif
i[j].upper()
=
=
'MAY'
:
i[j]
=
'5'
elif
i[j].upper()
=
=
'APRIL'
:
i[j]
=
'4'
elif
i[j].upper()
=
=
'JANUARY'
:
i[j]
=
'1'
elif
i[j].upper()
=
=
'FEBRUARY'
:
i[j]
=
'2'
elif
i[j].upper()
=
=
'MARCH'
:
i[j]
=
'3'
elif
i[j].upper()
=
=
'JULY'
:
i[j]
=
'7'
elif
i[j].upper()
=
=
'AUGUST'
:
i[j]
=
'8'
elif
i[j].upper()
=
=
'SEPTEMBER'
:
i[j]
=
'9'
elif
i[j].upper()
=
=
'OCTOBER'
:
i[j]
=
'10'
elif
i[j].upper()
=
=
'NOVEMBER'
:
i[j]
=
'11'
elif
i[j].upper()
=
=
'DECEMBER'
:
i[j]
=
'12'
finallist
=
[]
for
i
in
res:
for
j
in
range
(
len
(i)):
if
j <
len
(i)
-
1
:
finallist.append(
f
'2005-{i[-1]}-{i[j]}'
)
finallist.reverse()
listtemp1
=
df.values.tolist()
removelist
=
[
'LOCATION'
,
'LANCASTER'
,
'SPITFIRE'
,
'HURRICANE'
,
'DAKOTA'
,
'DATE'
,
'JUNE'
,
'JANUARY'
,
'FEBRUARY'
,
'MARCH'
,
'MAY'
,
'JULY'
,
'AUGUST'
,
'SEPTEMBER'
,
'OCTOBER'
,
'NOVEMBER'
,
'DECEMBER'
,
'APRIL'
]
for
i
in
listtemp1:
for
j
in
range
(
len
(i)):
for
place
in
removelist:
if
str
(i[j]).upper()
=
=
place:
i[j]
=
'0000_removable'
else
:
pass
dellist
=
[
'0000_removable'
,
'0000_removable'
,
'0000_removable'
,
'0000_removable'
,
'0000_removable'
,
'0000_removable'
]
res
=
[i
for
i
in
listtemp1
if
i !
=
dellist]
df3
=
pd.DataFrame()
df3
=
pd.DataFrame(res, columns
=
[
'Date'
,
'LOCATION'
,
'LANCASTER'
,
'SPITFIRE'
,
'HURRICANE'
,
'DAKOTA'
])
idx
=
0
df3.insert(loc
=
idx, column
=
'DATE'
, value
=
finallist)
pd.options.display.max_rows
=
500
df[
"DATE"
].fillna(method
=
'ffill'
, inplace
=
True
)
display
=
df3[(df3[
'Location'
].
str
.contains(
'- Display'
)) & (df3[
'Dakota'
].
str
.contains(
'D'
)) & (df3[
'Spitfire'
].
str
.contains(
'S'
, na
=
True
)) & (df3[
'Lancaster'
] !
=
'L'
)]
display
display[
'DATE'
]
=
pd.to_datetime(display[
'DATE'
],
format
=
'%Y-%m-%d'
)
display[
'DATE'
]
=
pd.to_datetime(display[
'DATE'
]).dt.strftime(
'%d-%m-%Y'
)
display.drop(
'Lancaster'
, axis
=
1
, inplace
=
True
)
display.dropna(subset
=
[
'Spitfire'
,
'Hurricane'
], how
=
'all'
)