Posts: 61
Threads: 26
Joined: Feb 2023
Aug-23-2024, 06:34 PM
(This post was last modified: Aug-23-2024, 06:34 PM by Calab.)
Consider the following Pandas dataframes...
df1 = pd.DataFrame(data = [{"id": "One", "served_sites": ["1", "2" ,"3"],},
{"id": "Two", "served_sites": ["4" ,"5" ,"6"],}])
df2 = pd.DataFrame(data = [{"site_id": "1", "name": "Toronto"},
{"site_id": "2", "name": "Calgary"},
{"site_id": "3", "name": "Chicago"},
{"site_id": "4", "name": "New York"},
{"site_id": "5", "name": "Los Angeles"},]) How can I determine if "Calgary" is or is not in the area "One"?
How can I determine if "Calgary" is or is not in the area "Two"?
How can I determine that area "Two" contains a site id that does exist in df2?
How can I determine that "Boston" is not any of the sites?
I've been wrapping my brain around this for a few hours and I can't come up with an answers. :(
Posts: 1,088
Threads: 143
Joined: Jul 2017
Aug-25-2024, 01:49 PM
(This post was last modified: Aug-25-2024, 01:49 PM by Pedroski55.)
I am no expert, but I think this will do what you want.
import pandas as pd
df1 = pd.DataFrame(data = [{"id": "One", "served_sites": ["1", "2" ,"3"],},
{"id": "Two", "served_sites": ["4" ,"5" ,"6"],}])
df2 = pd.DataFrame(data = [{"site_id": "1", "name": "Toronto"},
{"site_id": "2", "name": "Calgary"},
{"site_id": "3", "name": "Chicago"},
{"site_id": "4", "name": "New York"},
{"site_id": "5", "name": "Los Angeles"},])
# How can I determine if "Calgary" is or is not in the area "One"?
# get the site_id for any row whose name column contains Calgary
cond1 = (df2['name'] == 'Calgary')
res1 = df2[cond1].site_id.values.tolist()
cond2 = df1["id"] == "One"
res2 = df1[cond2].served_sites.values.tolist()
for v in res2[0]:
if v in res1:
print(f'{v} is in Area One and in df2')
#How can I determine if "Calgary" is or is not in the area "Two"?
# see above
#How can I determine that area "Two" contains a site id that does exist in df2?
cond3 = (df1['id'] == 'Two')
res3 = df1[cond3].served_sites.values.tolist()
for v in res3[0]:
if v in df2["site_id"].values.tolist():
print(f'{v} is in Area Two and in df2')
# How can I determine that "Boston" is not any of the sites?
cond4 = (df2['name'] == 'Boston')
res4 = df2[cond4].site_id.values.tolist()
# if True then Boston is not found in df2['name']
len(res4) == 0 # True
Posts: 6,778
Threads: 20
Joined: Feb 2020
I don't think pandas is good for that kind of thing. I would convert to dictionaries.
import pandas as pd
df1 = pd.DataFrame(
data=[
{
"id": "One",
"served_sites": ["1", "2", "3"],
},
{
"id": "Two",
"served_sites": ["4", "5", "6"],
},
]
)
df2 = pd.DataFrame(
data=[
{"site_id": "1", "name": "Toronto"},
{"site_id": "2", "name": "Calgary"},
{"site_id": "3", "name": "Chicago"},
{"site_id": "4", "name": "New York"},
{"site_id": "5", "name": "Los Angeles"},
]
)
site_names = dict(zip(df2.site_id, df2.name))
area_sites = dict(zip(df1.id, df1.served_sites))
for name, sites in area_sites.items():
area_sites[name] = [site_names.get(site, "undefined") for site in sites]
print(area_sites)
print("Is Calgary in area One?", "Calgary" in area_sites["One"])
print("Is Calgary in area Two?", "Calgary" in area_sites["Two"])
print("All sites common to areas One and Two", set(area_sites["One"]) & set(area_sites["Two"]))
Posts: 61
Threads: 26
Joined: Feb 2023
(Aug-23-2024, 06:34 PM)Calab Wrote: Consider the following Pandas dataframes...
df1 = pd.DataFrame(data = [{"id": "One", "served_sites": ["1", "2" ,"3"],},
{"id": "Two", "served_sites": ["4" ,"5" ,"6"],}])
df2 = pd.DataFrame(data = [{"site_id": "1", "name": "Toronto"},
{"site_id": "2", "name": "Calgary"},
{"site_id": "3", "name": "Chicago"},
{"site_id": "4", "name": "New York"},
{"site_id": "5", "name": "Los Angeles"},]) How can I determine if "Calgary" is or is not in the area "One"?
How can I determine if "Calgary" is or is not in the area "Two"?
How can I determine that area "Two" contains a site id that does exist in df2?
How can I determine that "Boston" is not any of the sites?
This is what I eventually ended up with on my own:
import pandas as pd
df1 = pd.DataFrame(data = [{"id": "One", "served_sites": ["1", "2" ,"3"],},
{"id": "Two", "served_sites": ["4" ,"5" ,"6"],}])
df2 = pd.DataFrame(data = [{"site_id": "1", "name": "Toronto"},
{"site_id": "2", "name": "Calgary"},
{"site_id": "3", "name": "Chicago"},
{"site_id": "4", "name": "New York"},
{"site_id": "5", "name": "Los Angeles"},])
##### Is Calgary served by area One?
find_site = "Calgary"
find_area = "One"
site_idx = df2.query("name == '{}'".format(find_site))["site_id"].values[0]
served_sites = df1.loc[df1["id"] == find_area]["served_sites"].values[0]
if site_idx in served_sites:
print("{} is served by area {}".format(find_site, find_area))
else:
print("{} is NOT served by area {}".format(find_site, find_area))
##### Is Calgary served by area Two?
find_area = "Two"
site_idx = df2.query("name == '{}'".format(find_site))["site_id"].values[0]
served_sites = df1.loc[df1["id"] == find_area]["served_sites"].values[0]
if site_idx in served_sites:
print("{} is served by area {}".format(find_site, find_area))
else:
print("{} is NOT served by area {}".format(find_site, find_area))
##### Does area Two have any invalid sites?
verify_area = "Two"
verify_served_sites = df1.loc[df1["id"] == verify_area]["served_sites"].values[0]
for s_site in verify_served_sites:
find_site = df2.loc[df2["site_id"] == s_site]["site_id"]
if find_site.empty:
print("Area {} has an invalid site id {}".format(verify_area, s_site))
##### Is Boston a site?
find_site = "Boston"
found_id = df2.query("name == '{}'".format(find_site))
if found_id.empty:
print("{} is NOT a site".format(find_site))
else:
print("{} is a site".format(find_site))
|