Oct-27-2020, 02:58 AM
Good day,
I am trying to solve the next lab https://github.com/hse-aml/competitive-d...ek_4.ipynb
This is the code, but unfortunately, the following function is deprecated:
gb = sales.groupby(['shop_id', 'date_block_num'],as_index=False).agg({'item_cnt_day':{'target_shop':'sum'}})
I don't know what to do to solve, I have tried different configurations but when I solve these lines issues, others appear because of it. Can you please help me?
---------------------------------------------------------------------------
SpecificationError Traceback (most recent call last)
<ipython-input-6-1ebfd0479d3e> in <module>
13
14 # Groupby data to get shop-item-month aggregates
---> 15 gb = sales.groupby(index_cols,as_index=False).agg({'item_cnt_day':{'target':'sum'}})
16 # Fix column names
17 gb.columns = [col[0] if col[-1]=='' else col[-1] for col in gb.columns.values]
~\anaconda3\lib\site-packages\pandas\core\groupby\generic.py in aggregate(self, func, *args, **kwargs)
926 func = _maybe_mangle_lambdas(func)
927
--> 928 result, how = self._aggregate(func, *args, **kwargs)
929 if how is None:
930 return result
~\anaconda3\lib\site-packages\pandas\core\base.py in _aggregate(self, arg, *args, **kwargs)
340 # {'ra' : { 'A' : 'mean' }}
341 if isinstance(v, dict):
--> 342 raise SpecificationError("nested renamer is not supported")
343 elif isinstance(obj, ABCSeries):
344 raise SpecificationError("nested renamer is not supported")
SpecificationError: nested renamer is not supported
I am trying to solve the next lab https://github.com/hse-aml/competitive-d...ek_4.ipynb
This is the code, but unfortunately, the following function is deprecated:
gb = sales.groupby(['shop_id', 'date_block_num'],as_index=False).agg({'item_cnt_day':{'target_shop':'sum'}})
I don't know what to do to solve, I have tried different configurations but when I solve these lines issues, others appear because of it. Can you please help me?

import numpy as np import pandas as pd import sklearn import scipy.sparse import lightgbm for p in [np, pd, scipy, sklearn, lightgbm print (p.__name__, p.__version__)]
mport pandas as pd import numpy as np import gc import matplotlib.pyplot as plt %matplotlib inline pd.set_option('display.max_rows', 600) pd.set_option('display.max_columns', 50) import lightgbm as lgb from sklearn.linear_model import LinearRegression from sklearn.metrics import r2_score from tqdm import tqdm_notebook from itertools import product def downcast_dtypes(df): ''' Changes column types in the dataframe: `float64` type to `float32` `int64` type to `int32` ''' # Select columns to downcast float_cols = [c for c in df if df[c].dtype == "float64"] int_cols = [c for c in df if df[c].dtype == "int64"] # Downcast df[float_cols] = df[float_cols].astype(np.float32) df[int_cols] = df[int_cols].astype(np.int32) return df sales = pd.read_csv('../readonly/final_project_data/sales_train.csv.gz') shops = pd.read_csv('../readonly/final_project_data/shops.csv') items = pd.read_csv('../readonly/final_project_data/items.csv') item_cats = pd.read_csv('../readonly/final_project_data/item_categories.csv') sales = sales[sales['shop_id'].isin([26, 27, 28])] # Create "grid" with columns index_cols = ['shop_id', 'item_id', 'date_block_num'] # For every month we create a grid from all shops/items combinations from that month grid = [] for block_num in sales['date_block_num'].unique(): cur_shops = sales.loc[sales['date_block_num'] == block_num, 'shop_id'].unique() cur_items = sales.loc[sales['date_block_num'] == block_num, 'item_id'].unique() grid.append(np.array(list(product(*[cur_shops, cur_items, [block_num]])),dtype='int32')) # Turn the grid into a dataframe grid = pd.DataFrame(np.vstack(grid), columns = index_cols,dtype=np.int32) # Groupby data to get shop-item-month aggregates gb = sales.groupby(index_cols,as_index=False).agg({'item_cnt_day':{'target':'sum'}}) # Fix column names gb.columns = [col[0] if col[-1]=='' else col[-1] for col in gb.columns.values] # Join it to the grid all_data = pd.merge(grid, gb, how='left', on=index_cols).fillna(0) # Same as above but with shop-month aggregates gb = sales.groupby(['shop_id', 'date_block_num'],as_index=False).agg({'item_cnt_day':{'target_shop':'sum'}}) gb.columns = [col[0] if col[-1]=='' else col[-1] for col in gb.columns.values] all_data = pd.merge(all_data, gb, how='left', on=['shop_id', 'date_block_num']).fillna(0) # Same as above but with item-month aggregates gb = sales.groupby(['item_id', 'date_block_num'],as_index=False).agg({'item_cnt_day':{'target_item':'sum'}}) gb.columns = [col[0] if col[-1] == '' else col[-1] for col in gb.columns.values] all_data = pd.merge(all_data, gb, how='left', on=['item_id', 'date_block_num']).fillna(0) # Downcast dtypes from 64 to 32 bit to save memory all_data = downcast_dtypes(all_data) del grid, gb gc.collect();The mistake is:
---------------------------------------------------------------------------
SpecificationError Traceback (most recent call last)
<ipython-input-6-1ebfd0479d3e> in <module>
13
14 # Groupby data to get shop-item-month aggregates
---> 15 gb = sales.groupby(index_cols,as_index=False).agg({'item_cnt_day':{'target':'sum'}})
16 # Fix column names
17 gb.columns = [col[0] if col[-1]=='' else col[-1] for col in gb.columns.values]
~\anaconda3\lib\site-packages\pandas\core\groupby\generic.py in aggregate(self, func, *args, **kwargs)
926 func = _maybe_mangle_lambdas(func)
927
--> 928 result, how = self._aggregate(func, *args, **kwargs)
929 if how is None:
930 return result
~\anaconda3\lib\site-packages\pandas\core\base.py in _aggregate(self, arg, *args, **kwargs)
340 # {'ra' : { 'A' : 'mean' }}
341 if isinstance(v, dict):
--> 342 raise SpecificationError("nested renamer is not supported")
343 elif isinstance(obj, ABCSeries):
344 raise SpecificationError("nested renamer is not supported")
SpecificationError: nested renamer is not supported