这是您的稍作修改的版本cython
解决方案:
import numpy as np
cimport numpy as np
from libc.math cimport floor
from cython cimport boundscheck, wraparound, nonecheck, cdivision
DTYPE = np.float
ctypedef np.float_t DTYPE_t
@boundscheck(False)
@wraparound(False)
@nonecheck(False)
def interp3D(DTYPE_t[:,:,::1] v, DTYPE_t[:,:,::1] xs, DTYPE_t[:,:,::1] ys, DTYPE_t[:,:,::1] zs):
cdef int X, Y, Z
X,Y,Z = v.shape[0], v.shape[1], v.shape[2]
cdef np.ndarray[DTYPE_t, ndim=3] interpolated = np.zeros((X, Y, Z), dtype=DTYPE)
_interp3D(&v[0,0,0], &xs[0,0,0], &ys[0,0,0], &zs[0,0,0], &interpolated[0,0,0], X, Y, Z)
return interpolated
@cdivision(True)
cdef inline void _interp3D(DTYPE_t *v, DTYPE_t *x_points, DTYPE_t *y_points, DTYPE_t *z_points,
DTYPE_t *result, int X, int Y, int Z):
cdef:
int i, x0, x1, y0, y1, z0, z1, dim
DTYPE_t x, y, z, xd, yd, zd, c00, c01, c10, c11, c0, c1, c
dim = X*Y*Z
for i in range(dim):
x = x_points[i]
y = y_points[i]
z = z_points[i]
x0 = <int>floor(x)
x1 = x0 + 1
y0 = <int>floor(y)
y1 = y0 + 1
z0 = <int>floor(z)
z1 = z0 + 1
xd = (x-x0)/(x1-x0)
yd = (y-y0)/(y1-y0)
zd = (z-z0)/(z1-z0)
if x0 >= 0 and y0 >= 0 and z0 >= 0:
c00 = v[Y*Z*x0+Z*y0+z0]*(1-xd) + v[Y*Z*x1+Z*y0+z0]*xd
c01 = v[Y*Z*x0+Z*y0+z1]*(1-xd) + v[Y*Z*x1+Z*y0+z1]*xd
c10 = v[Y*Z*x0+Z*y1+z0]*(1-xd) + v[Y*Z*x1+Z*y1+z0]*xd
c11 = v[Y*Z*x0+Z*y1+z1]*(1-xd) + v[Y*Z*x1+Z*y1+z1]*xd
c0 = c00*(1-yd) + c10*yd
c1 = c01*(1-yd) + c11*yd
c = c0*(1-zd) + c1*zd
else:
c = 0
result[i] = c
结果仍然和你的一样。随机网格数据为60x60x60
我得到以下时间:
SciPy's solution: 982ms
Your cython solution: 24.7ms
Above modified cython solution: 8.17ms
所以它比你的快了近 4 倍cython
解决方案。注意
- Cython 默认对数组执行边界检查,如果您需要该功能,请打开边界检查删除
@boundscheck(False)
.
- 在上面的解决方案中,数组需要是 C 连续的
- 如果您想要上述代码的并行变体,请替换
range
代替prange
在你的for loop
.
希望这可以帮助。