首先,您需要从数据中删除 NaN 行。
干得好:
import pandas as pd
import numpy as np
df = pd.DataFrame({'Name':['Nir','Nir','Nir','Nir','Xyc','Xyc','Xyc'],'PREDICTED':[100,200,100,200,100,200,300],
'ACTUAL':[400,400,400,400,500,500,500],
'YYYY_MM_DD':['2020-01-01','2020-01-02','2020-01-03','2020-01-04','2020-01-01','2020-01-02','2020-01-03']})
def calculate(item):
# select name
data = df[df['Name'] == item]
# calculate sum
sum = data['PREDICTED'].sum()
# remove NaN rows
data = data.dropna()
# calculate and insert month to date column values
month_to_date = []
value = 0
for index, row in data.iterrows():
value += row['ACTUAL']
month_to_date.append(value)
data.insert(3, "MONTH_TO_DATE", month_to_date, True)
# calculate and instert payout values
conditions = [
(data['MONTH_TO_DATE'] < sum),
(data['MONTH_TO_DATE'] >= sum)
]
choices = [0, ((data['ACTUAL'] - data['PREDICTED'])/100).astype(int)]
data.insert(5, "PAYOUT", np.select(conditions, choices), True)
return data
# collect results
results = pd.DataFrame(columns=['Name','PREDICTED','ACTUAL','MONTH_TO_DATE','YYYY_MM_DD','PAYOUT'])
for item in df['Name'].unique():
df2 = calculate(item)
results = results.append(df2)
Result:
Name PREDICTED ACTUAL MONTH_TO_DATE YYYY_MM_DD PAYOUT
0 Nir 100 400 400 2020-01-01 0
1 Nir 200 400 800 2020-01-02 2
2 Nir 100 400 1200 2020-01-03 3
3 Nir 200 400 1600 2020-01-04 2
4 Xyc 100 500 500 2020-01-01 0
5 Xyc 200 500 1000 2020-01-02 3
6 Xyc 300 500 1500 2020-01-03 2