itertools.groupby
可用于对值进行分组(排序后)。用于numpy
arrays
是可选的。
import numpy as np
import itertools
N=50
# a = np.random.rand(50)*100
a = np.random.randint(0,100,N) # int to make printing more compact
b = np.random.randint(35,45, N)
# make structured array to easily sort both arrays together
dtype = np.dtype([('a',float),('b',int)])
ab = np.ndarray(a.shape,dtype=dtype)
ab['a'] = a
ab['b'] = b
# ab = np.sort(ab,order=['b']) # sorts both 'b' and 'a'
I = np.argsort(b,kind='mergesort') # preserves order
ab = ab[I]
# now group, and extract lists of lists
gp = itertools.groupby(ab, lambda x: x['b'])
xx = [list(x[1]) for x in gp]
#print np.array([[y[0] for y in x] for x in xx]) # list of lists
def filled(x):
M = max(len(z) for z in x)
return np.array([z+[np.NaN]*(M-len(z)) for z in x])
print filled([[y[1] for y in x] for x in xx]).T
print filled([[y[0] for y in x] for x in xx]).T
生产:
[[ 35. 36. 37. 38. 39. 40. 41. 42. 43. 44.]
[ 35. 36. 37. 38. 39. 40. 41. 42. 43. 44.]
[ nan 36. 37. nan 39. 40. 41. 42. 43. 44.]
[ nan 36. 37. nan 39. 40. 41. 42. 43. 44.]
...]
[[ 54. 69. 34. 28. 71. 53. 33. 19. 64. 56.]
[ 90. 52. 11. 9. 50. 53. 25. 37. 69. 56.]
[ nan 97. 31. nan 69. 35. 2. 80. 91. 54.]
[ nan 33. 87. nan 47. 90. 81. 45. 86. 57.]
...]
我在用argsort
with mergesort
保持秩序a
在子列表内。np.sort
对两者进行词法排序b
and a
(与我的预期相反order
范围)。
另一种方法是使用 Python 字典,也保留了顺序a
。在大型数组上它可能会更慢,但它隐藏的细节更少:
import collections
d = collections.defaultdict(list)
for k,v in zip(b,a):
d[k].append(v)
values = [d[k] for k in sorted(d.keys())]
print filled(values).T