这里最好的是不使用apply(pd.Series)
因为很慢,但是DataFrame
带转换的构造函数NaN
s to 0
然后到int
s:
df = pd.DataFrame({'column1': [{'c681': 1, 'c682': 2, 'c57': 4, 'c680': 5},
{'c681': 2, 'c682': 1, 'c57': 7, 'c680': 6},
{'c681': 4, 'c682': 2, 'c57': 8, 'c680': 2},
{'c681': 2, 'c682': 3, 'c57': 1, 'c680': 4},
{'c683': 3, 'c681': 5, 'c57': 0, 'c680': 3}]},
index=['e1','e2','e3','e4','e5'])
print (df)
column1
e1 {'c680': 5, 'c682': 2, 'c57': 4, 'c681': 1}
e2 {'c680': 6, 'c682': 1, 'c57': 7, 'c681': 2}
e3 {'c680': 2, 'c682': 2, 'c57': 8, 'c681': 4}
e4 {'c680': 4, 'c682': 3, 'c57': 1, 'c681': 2}
e5 {'c683': 3, 'c680': 3, 'c57': 0, 'c681': 5}
df = pd.DataFrame(df['column1'].values.tolist(), index=df.index).fillna(0).astype(int)
print (df)
c57 c680 c681 c682 c683
e1 4 5 1 2 0
e2 7 6 2 1 0
e3 8 2 4 2 0
e4 1 4 2 3 0
e5 0 3 5 0 3
df = pd.concat([df] * 1000, ignore_index=True)
In [108]: %timeit (pd.DataFrame(df['column1'].values.tolist(), index=df.index))
100 loops, best of 3: 10.1 ms per loop
In [109]: %timeit (df['column1'].apply(pd.Series))
1 loop, best of 3: 1.14 s per loop