numpy:gzip 压缩文件的 fromfile



import numpy as np
import pandas as pd

def read_best_file(file, **kwargs):
    Loads best price data into a dataframe
    names   = [ 'time', 'bid_size', 'bid_price', 'ask_size', 'ask_price' ]
    formats = [ 'u8',   'i4',       'f8',        'i4',       'f8'        ]
    offsets = [  0,      8,          12,          20,         24         ]

    dt = np.dtype({
            'names': names, 
            'formats': formats,
            'offsets': offsets 
    return pd.DataFrame(np.fromfile(file, dt))

我想扩展此方法以处理 gzip 压缩文件。


file : file or str
Open file object or filename

因此,我添加了以下内容来检查 gzip 文件路径:

if isinstance(file, str) and file.endswith(".gz"):
    file =, "r")


IOError: first argument must be an open file


我怎样才能打电话numpy.fromfile使用 gzip 压缩文件?


根据评论中的请求,显示检查 gzip 压缩文件的实现:

def read_best_file(file, **kwargs):
    Loads best price data into a dataframe
    names   = [ 'time', 'bid_size', 'bid_price', 'ask_size', 'ask_price' ]
    formats = [ 'u8',   'i4',       'f8',        'i4',       'f8'        ]
    offsets = [  0,      8,          12,          20,         24         ]

    dt = np.dtype({
            'names': names, 
            'formats': formats,
            'offsets': offsets 

    if isinstance(file, str) and file.endswith(".gz"):
        file =, "r")

    return pd.DataFrame(np.fromfile(file, dt))

我通过 numpy.frombuffer() 提供 read() 结果,成功地从 gzipped 文件中读取原始二进制数据数组。此代码适用于 Python 3.7.3,也许也适用于早期版本。

# Example: read short integers (signed) from gzipped raw binary file

import gzip
import numpy as np

fname_gzipped = 'my_binary_data.dat.gz'
raw_dtype = np.int16
with, 'rb') as f:
    from_gzipped = np.frombuffer(, dtype=raw_dtype)

# Demonstrate equivalence with direct np.fromfile()
fname_raw = 'my_binary_data.dat'
from_raw = np.fromfile(fname_raw, dtype=raw_dtype)

# True
print('raw binary and gunzipped are the same: {}'.format(
    np.array_equiv(from_gzipped, from_raw)))

# False
wrong_dtype = np.uint8
binary_as_wrong_dtype = np.fromfile(fname_raw, dtype=wrong_dtype)
print('wrong dtype and gunzipped are the same: {}'.format(
    np.array_equiv(from_gzipped, binary_as_wrong_dtype)))


