May-25-2019, 01:56 PM
(This post was last modified: May-25-2019, 01:56 PM by michalmonday.)
It took some tweaks to get it right, but I hope it's working well now:)
import pandas as pd def get_team_names(df): return set(df['H_team']) # this is assuming that every team plays at least 1 home game df = pd.DataFrame([ ["Gothenburg", "Malmo", 2010,"A"], ["Malmo","Gothenburg", 2010, "D"], ["Malmo", "Gothenburg", 2010, "A"], ["Gothenburg", "Malmo", 2010, "H"], ["foo", "bar", 2010,"H"], ["bar","foo", 2010, "H"], ["bar", "foo", 2010, "D"], ["foo", "bar", 2010, "H"], ["Gothenburg", "Malmo" ,2011, "D"], ["Gothenburg", "Malmo", 2011, "A"], ["Gothenburg", "Malmo", 2011, "H"], ["Malmo", "Gothenburg", 2011, "A"], ["foo", "bar" ,2011, "A"], ["foo", "bar", 2011, "A"], ["foo", "bar", 2011, "D"], ["bar", "foo", 2011, "H"], ["foo", "bar", 2012, "D"], ["bar", "foo", 2012, "D"], [ "bar","foo", 2012, "A"], [ "bar", "foo",2012, "A"], ["Gothenburg", "Malmo", 2012, "H"], ["Malmo", "Gothenburg", 2012, "H"], [ "Malmo","Gothenburg", 2012, "D"], [ "Malmo", "Gothenburg",2012, "H"], ["Malmo", "Gothenburg", 2013, "H"], [ "Gothenburg","Malmo", 2013, "D"], [ "Malmo", "Gothenburg",2013, "H"], ["bar", "foo", 2013, "D"], [ "bar","foo", 2013, "H"], [ "foo", "bar",2013, "A"], ]) df.columns = ['H_team', 'A_team', 'Year', 'Result'] df['H_points'] = 0 df['A_points'] = 0 for year in set(df['Year']): for team in get_team_names(df): draws = (df['Result']=='D').where((df['Year'] == year) & ((df['H_team'] == team) | (df['A_team'] == team))) h_wins = (df['Result']=='H').where((df['H_team'] == team) & (df['Year'] == year)) a_wins = (df['Result']=='A').where((df['A_team'] == team) & (df['Year'] == year)) team_points = (h_wins.add(a_wins, fill_value=0) * 3).add(draws, fill_value=0).cumsum().shift(1) df['H_points'] = df['H_points'].add( team_points.where(df['H_team'] == team), fill_value=0) df['A_points'] = df['A_points'].add( team_points.where(df['A_team'] == team), fill_value=0) fgi = df.index[(df['Year'] == year)][0] # first game (of the season) index df.loc[fgi, ('A_points', 'H_points')] = 0 # this is necessary because: ''' without these lines: 23 Malmo Gothenburg 2012 H 4.0 4.0 24 Malmo Gothenburg 2013 H 7.0 4.0 with these lines: 23 Malmo Gothenburg 2012 H 4.0 4.0 24 Malmo Gothenburg 2013 H 0.0 0.0 ''' print(df)
Output: H_team A_team Year Result H_points A_points
0 Gothenburg Malmo 2010 A 0.0 0.0
1 Malmo Gothenburg 2010 D 3.0 0.0
2 Malmo Gothenburg 2010 A 4.0 1.0
3 Gothenburg Malmo 2010 H 4.0 4.0
4 foo bar 2010 H 0.0 0.0
5 bar foo 2010 H 0.0 3.0
6 bar foo 2010 D 3.0 3.0
7 foo bar 2010 H 4.0 4.0
8 Gothenburg Malmo 2011 D 0.0 0.0
9 Gothenburg Malmo 2011 A 1.0 1.0
10 Gothenburg Malmo 2011 H 1.0 4.0
11 Malmo Gothenburg 2011 A 4.0 4.0
12 foo bar 2011 A 0.0 0.0
13 foo bar 2011 A 0.0 3.0
14 foo bar 2011 D 0.0 6.0
15 bar foo 2011 H 7.0 1.0
16 foo bar 2012 D 0.0 0.0
17 bar foo 2012 D 1.0 1.0
18 bar foo 2012 A 2.0 2.0
19 bar foo 2012 A 2.0 5.0
20 Gothenburg Malmo 2012 H 0.0 0.0
21 Malmo Gothenburg 2012 H 0.0 3.0
22 Malmo Gothenburg 2012 D 3.0 3.0
23 Malmo Gothenburg 2012 H 4.0 4.0
24 Malmo Gothenburg 2013 H 0.0 0.0
25 Gothenburg Malmo 2013 D 0.0 3.0
26 Malmo Gothenburg 2013 H 4.0 1.0
27 bar foo 2013 D 0.0 0.0
28 bar foo 2013 H 1.0 1.0
29 foo bar 2013 A 1.0 4.0