您可以使用boolean indexing首先,然后groupby
with nunique最后join:
base_data = pd.DataFrame({"DEPT": ["a", "a", "b", "b"],
"CLAS":['d','d','d','d'],
"STOCK": [-1, 1, 2,2],
"DATE":pd.to_datetime(['2001-10-10','2001-10-10',
'2001-10-10','2001-10-10']),
"ITEM":[1,2,3,4]})
print (base_data)
CLAS DATE DEPT ITEM STOCK
0 d 2001-10-10 a 1 -1
1 d 2001-10-10 a 2 1
2 d 2001-10-10 b 3 2
3 d 2001-10-10 b 4 2
assort_size = base_data[(base_data['STOCK'] > 0)]\
.groupby(['DEPT','CLAS','DATE'])['ITEM'].nunique().rename('n_item')
print (assort_size)
DEPT CLAS DATE
a d 2001-10-10 1
b d 2001-10-10 2
Name: n_item, dtype: int64
print (base_data.join(assort_size, on=['DEPT','CLAS','DATE']))
CLAS DATE DEPT ITEM STOCK n_item
0 d 2001-10-10 a 1 -1 1
1 d 2001-10-10 a 2 1 1
2 d 2001-10-10 b 3 2 2
3 d 2001-10-10 b 4 2 2